Sync from v0.13
This commit is contained in:
4
tests/config/test_config.yaml
Normal file
4
tests/config/test_config.yaml
Normal file
@@ -0,0 +1,4 @@
|
||||
port: 12312
|
||||
served_model_name: mymodel
|
||||
tensor_parallel_size: 2
|
||||
trust_remote_code: true
|
||||
75
tests/config/test_config_generation.py
Normal file
75
tests/config/test_config_generation.py
Normal file
@@ -0,0 +1,75 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import pytest
|
||||
|
||||
from vllm.engine.arg_utils import EngineArgs
|
||||
from vllm.model_executor.layers.quantization.quark.utils import deep_compare
|
||||
|
||||
|
||||
def test_cuda_empty_vs_unset_configs(monkeypatch: pytest.MonkeyPatch):
|
||||
"""Test that configs created with normal (untouched) CUDA_VISIBLE_DEVICES
|
||||
and CUDA_VISIBLE_DEVICES="" are equivalent. This ensures consistent
|
||||
behavior regardless of whether GPU visibility is disabled via empty string
|
||||
or left in its normal state.
|
||||
"""
|
||||
|
||||
def create_config():
|
||||
engine_args = EngineArgs(
|
||||
model="deepseek-ai/DeepSeek-V2-Lite", trust_remote_code=True
|
||||
)
|
||||
return engine_args.create_engine_config()
|
||||
|
||||
# Create config with CUDA_VISIBLE_DEVICES set normally
|
||||
normal_config = create_config()
|
||||
|
||||
# Create config with CUDA_VISIBLE_DEVICES=""
|
||||
with monkeypatch.context() as m:
|
||||
m.setenv("CUDA_VISIBLE_DEVICES", "")
|
||||
empty_config = create_config()
|
||||
|
||||
normal_config_dict = vars(normal_config)
|
||||
empty_config_dict = vars(empty_config)
|
||||
|
||||
# Remove instance_id before comparison as it's expected to be different
|
||||
normal_config_dict.pop("instance_id", None)
|
||||
empty_config_dict.pop("instance_id", None)
|
||||
|
||||
assert deep_compare(normal_config_dict, empty_config_dict), (
|
||||
'Configs with normal CUDA_VISIBLE_DEVICES and CUDA_VISIBLE_DEVICES=""'
|
||||
" should be equivalent"
|
||||
)
|
||||
|
||||
|
||||
def test_ray_runtime_env(monkeypatch: pytest.MonkeyPatch):
|
||||
# In testing, this method needs to be nested inside as ray does not
|
||||
# see the test module.
|
||||
def create_config():
|
||||
engine_args = EngineArgs(
|
||||
model="deepseek-ai/DeepSeek-V2-Lite", trust_remote_code=True
|
||||
)
|
||||
return engine_args.create_engine_config()
|
||||
|
||||
config = create_config()
|
||||
parallel_config = config.parallel_config
|
||||
assert parallel_config.ray_runtime_env is None
|
||||
|
||||
import ray
|
||||
|
||||
ray.init()
|
||||
|
||||
runtime_env = {
|
||||
"env_vars": {
|
||||
"TEST_ENV_VAR": "test_value",
|
||||
},
|
||||
}
|
||||
|
||||
config_ref = ray.remote(create_config).options(runtime_env=runtime_env).remote()
|
||||
|
||||
config = ray.get(config_ref)
|
||||
parallel_config = config.parallel_config
|
||||
assert parallel_config.ray_runtime_env is not None
|
||||
assert (
|
||||
parallel_config.ray_runtime_env.env_vars().get("TEST_ENV_VAR") == "test_value"
|
||||
)
|
||||
|
||||
ray.shutdown()
|
||||
166
tests/config/test_config_utils.py
Normal file
166
tests/config/test_config_utils.py
Normal file
@@ -0,0 +1,166 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.config.utils import get_hash_factors, hash_factors, normalize_value
|
||||
|
||||
# Helpers
|
||||
|
||||
|
||||
def endswith_fqname(obj, suffix: str) -> bool:
|
||||
# normalize_value(type) returns fully-qualified name
|
||||
# Compare suffix to avoid brittle import paths.
|
||||
out = normalize_value(obj)
|
||||
return isinstance(out, str) and out.endswith(suffix)
|
||||
|
||||
|
||||
def expected_path(p_str: str = ".") -> str:
|
||||
import pathlib
|
||||
|
||||
p = pathlib.Path(p_str)
|
||||
return p.expanduser().resolve().as_posix()
|
||||
|
||||
|
||||
# Minimal dataclass to test get_hash_factors.
|
||||
# Avoid importing heavy vLLM configs.
|
||||
@dataclass
|
||||
class SimpleConfig:
|
||||
a: object
|
||||
b: object | None = None
|
||||
|
||||
|
||||
class DummyLogprobsMode(Enum):
|
||||
RAW_LOGITS = "raw_logits"
|
||||
|
||||
|
||||
def test_hash_factors_deterministic():
|
||||
"""Test that hash_factors produces consistent SHA-256 hashes"""
|
||||
factors = {"a": 1, "b": "test"}
|
||||
hash1 = hash_factors(factors)
|
||||
hash2 = hash_factors(factors)
|
||||
|
||||
assert hash1 == hash2
|
||||
# Dict key insertion order should not affect the hash.
|
||||
factors_reordered = {"b": "test", "a": 1}
|
||||
assert hash_factors(factors_reordered) == hash1
|
||||
assert len(hash1) == 64
|
||||
assert all(c in "0123456789abcdef" for c in hash1)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"inp, expected",
|
||||
[
|
||||
(None, None),
|
||||
(True, True),
|
||||
(1, 1),
|
||||
(1.0, 1.0),
|
||||
("x", "x"),
|
||||
(b"ab", "6162"),
|
||||
(bytearray(b"ab"), "6162"),
|
||||
([1, 2], (1, 2)),
|
||||
({"b": 2, "a": 1}, (("a", 1), ("b", 2))),
|
||||
],
|
||||
)
|
||||
def test_normalize_value_matrix(inp, expected):
|
||||
"""Parametric input→expected normalization table."""
|
||||
assert normalize_value(inp) == expected
|
||||
|
||||
|
||||
def test_normalize_value_enum():
|
||||
# Enums normalize to (module.QualName, value).
|
||||
# DummyLogprobsMode uses a string payload.
|
||||
out = normalize_value(DummyLogprobsMode.RAW_LOGITS)
|
||||
assert isinstance(out, tuple)
|
||||
assert out[0].endswith("DummyLogprobsMode")
|
||||
# Expect string payload 'raw_logits'.
|
||||
assert out[1] == "raw_logits"
|
||||
|
||||
|
||||
def test_normalize_value_set_order_insensitive():
|
||||
# Sets are unordered; normalize_value sorts elements for determinism.
|
||||
assert normalize_value({3, 1, 2}) == normalize_value({1, 2, 3})
|
||||
|
||||
|
||||
def test_normalize_value_path_normalization():
|
||||
from pathlib import Path # local import to avoid global dependency
|
||||
|
||||
# Paths expand/resolve to absolute strings.
|
||||
# Stabilizes hashing across working dirs.
|
||||
assert normalize_value(Path(".")) == expected_path(".")
|
||||
|
||||
|
||||
def test_normalize_value_uuid_and_to_json():
|
||||
# Objects may normalize via uuid() or to_json_string().
|
||||
class HasUUID:
|
||||
def uuid(self):
|
||||
return "test-uuid"
|
||||
|
||||
class ToJson:
|
||||
def to_json_string(self):
|
||||
return '{"x":1}'
|
||||
|
||||
assert normalize_value(HasUUID()) == "test-uuid"
|
||||
assert normalize_value(ToJson()) == '{"x":1}'
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"bad",
|
||||
[
|
||||
(lambda x: x),
|
||||
(type("CallableInstance", (), {"__call__": lambda self: 0}))(),
|
||||
(lambda: (lambda: 0))(), # nested function instance
|
||||
],
|
||||
)
|
||||
def test_error_cases(bad):
|
||||
"""Inputs expected to raise TypeError."""
|
||||
# Reject functions/lambdas/callable instances
|
||||
# to avoid under-hashing.
|
||||
with pytest.raises(TypeError):
|
||||
normalize_value(bad)
|
||||
|
||||
|
||||
def test_enum_vs_int_disambiguation():
|
||||
# int stays primitive
|
||||
nf_int = normalize_value(1)
|
||||
assert nf_int == 1
|
||||
|
||||
# enum becomes ("module.QualName", value)
|
||||
nf_enum = normalize_value(DummyLogprobsMode.RAW_LOGITS)
|
||||
assert isinstance(nf_enum, tuple) and len(nf_enum) == 2
|
||||
enum_type, enum_val = nf_enum
|
||||
assert enum_type.endswith(".DummyLogprobsMode")
|
||||
assert enum_val == "raw_logits"
|
||||
|
||||
# Build factor dicts from configs with int vs enum
|
||||
f_int = get_hash_factors(SimpleConfig(1), set())
|
||||
f_enum = get_hash_factors(SimpleConfig(DummyLogprobsMode.RAW_LOGITS), set())
|
||||
# The int case remains a primitive value
|
||||
assert f_int["a"] == 1
|
||||
# The enum case becomes a tagged tuple ("module.QualName", "raw_logits")
|
||||
assert isinstance(f_enum["a"], tuple) and f_enum["a"][1] == "raw_logits"
|
||||
# Factor dicts must differ so we don't collide primitives with Enums.
|
||||
assert f_int != f_enum
|
||||
# Hash digests must differ correspondingly
|
||||
assert hash_factors(f_int) != hash_factors(f_enum)
|
||||
|
||||
# Hash functions produce stable hex strings
|
||||
h_int = hash_factors(f_int)
|
||||
h_enum = hash_factors(f_enum)
|
||||
assert isinstance(h_int, str) and len(h_int) == 64
|
||||
assert isinstance(h_enum, str) and len(h_enum) == 64
|
||||
|
||||
|
||||
def test_classes_are_types():
|
||||
"""Types normalize to FQNs; include real vLLM types."""
|
||||
# Only classes allowed; functions/lambdas are rejected.
|
||||
# Canonical form is the fully-qualified name.
|
||||
assert isinstance(normalize_value(str), str)
|
||||
|
||||
class LocalDummy:
|
||||
pass
|
||||
|
||||
assert endswith_fqname(LocalDummy, ".LocalDummy")
|
||||
6
tests/config/test_config_with_model.yaml
Normal file
6
tests/config/test_config_with_model.yaml
Normal file
@@ -0,0 +1,6 @@
|
||||
# Same as test_config.yaml but with model specified
|
||||
model: config-model
|
||||
port: 12312
|
||||
served_model_name: mymodel
|
||||
tensor_parallel_size: 2
|
||||
trust_remote_code: true
|
||||
53
tests/config/test_mp_reducer.py
Normal file
53
tests/config/test_mp_reducer.py
Normal file
@@ -0,0 +1,53 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import sys
|
||||
from unittest.mock import patch
|
||||
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.engine.arg_utils import AsyncEngineArgs
|
||||
from vllm.v1.engine.async_llm import AsyncLLM
|
||||
|
||||
|
||||
def test_mp_reducer():
|
||||
"""
|
||||
Test that _reduce_config reducer is registered when AsyncLLM is instantiated
|
||||
without transformers_modules. This is a regression test for
|
||||
https://github.com/vllm-project/vllm/pull/18640.
|
||||
"""
|
||||
|
||||
# Ensure transformers_modules is not in sys.modules
|
||||
if "transformers_modules" in sys.modules:
|
||||
del sys.modules["transformers_modules"]
|
||||
|
||||
with patch("multiprocessing.reducer.register") as mock_register:
|
||||
engine_args = AsyncEngineArgs(
|
||||
model="facebook/opt-125m",
|
||||
max_model_len=32,
|
||||
gpu_memory_utilization=0.1,
|
||||
disable_log_stats=True,
|
||||
)
|
||||
|
||||
async_llm = AsyncLLM.from_engine_args(
|
||||
engine_args,
|
||||
start_engine_loop=False,
|
||||
)
|
||||
|
||||
assert mock_register.called, (
|
||||
"multiprocessing.reducer.register should have been called"
|
||||
)
|
||||
|
||||
vllm_config_registered = False
|
||||
for call_args in mock_register.call_args_list:
|
||||
# Verify that a reducer for VllmConfig was registered
|
||||
if len(call_args[0]) >= 2 and call_args[0][0] == VllmConfig:
|
||||
vllm_config_registered = True
|
||||
|
||||
reducer_func = call_args[0][1]
|
||||
assert callable(reducer_func), "Reducer function should be callable"
|
||||
break
|
||||
|
||||
assert vllm_config_registered, (
|
||||
"VllmConfig should have been registered to multiprocessing.reducer"
|
||||
)
|
||||
|
||||
async_llm.shutdown()
|
||||
25
tests/config/test_multimodal_config.py
Normal file
25
tests/config/test_multimodal_config.py
Normal file
@@ -0,0 +1,25 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.attention.backends.registry import AttentionBackendEnum
|
||||
from vllm.config.multimodal import MultiModalConfig
|
||||
|
||||
|
||||
def test_mm_encoder_attn_backend_str_conversion():
|
||||
config = MultiModalConfig(mm_encoder_attn_backend="FLASH_ATTN")
|
||||
assert config.mm_encoder_attn_backend == AttentionBackendEnum.FLASH_ATTN
|
||||
|
||||
|
||||
def test_mm_encoder_attn_backend_invalid():
|
||||
with pytest.raises(ValueError):
|
||||
MultiModalConfig(mm_encoder_attn_backend="not_a_backend")
|
||||
|
||||
|
||||
def test_mm_encoder_attn_backend_hash_updates():
|
||||
base_hash = MultiModalConfig().compute_hash()
|
||||
overridden_hash = MultiModalConfig(
|
||||
mm_encoder_attn_backend=AttentionBackendEnum.FLASH_ATTN
|
||||
).compute_hash()
|
||||
assert base_hash != overridden_hash
|
||||
Reference in New Issue
Block a user