Sync from v0.13

This commit is contained in:
2026-01-19 10:38:50 +08:00
parent b2ef04d792
commit 5aef6c175a
3714 changed files with 854317 additions and 89342 deletions

View File

@@ -0,0 +1,4 @@
port: 12312
served_model_name: mymodel
tensor_parallel_size: 2
trust_remote_code: true

View File

@@ -0,0 +1,75 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from vllm.engine.arg_utils import EngineArgs
from vllm.model_executor.layers.quantization.quark.utils import deep_compare
def test_cuda_empty_vs_unset_configs(monkeypatch: pytest.MonkeyPatch):
"""Test that configs created with normal (untouched) CUDA_VISIBLE_DEVICES
and CUDA_VISIBLE_DEVICES="" are equivalent. This ensures consistent
behavior regardless of whether GPU visibility is disabled via empty string
or left in its normal state.
"""
def create_config():
engine_args = EngineArgs(
model="deepseek-ai/DeepSeek-V2-Lite", trust_remote_code=True
)
return engine_args.create_engine_config()
# Create config with CUDA_VISIBLE_DEVICES set normally
normal_config = create_config()
# Create config with CUDA_VISIBLE_DEVICES=""
with monkeypatch.context() as m:
m.setenv("CUDA_VISIBLE_DEVICES", "")
empty_config = create_config()
normal_config_dict = vars(normal_config)
empty_config_dict = vars(empty_config)
# Remove instance_id before comparison as it's expected to be different
normal_config_dict.pop("instance_id", None)
empty_config_dict.pop("instance_id", None)
assert deep_compare(normal_config_dict, empty_config_dict), (
'Configs with normal CUDA_VISIBLE_DEVICES and CUDA_VISIBLE_DEVICES=""'
" should be equivalent"
)
def test_ray_runtime_env(monkeypatch: pytest.MonkeyPatch):
# In testing, this method needs to be nested inside as ray does not
# see the test module.
def create_config():
engine_args = EngineArgs(
model="deepseek-ai/DeepSeek-V2-Lite", trust_remote_code=True
)
return engine_args.create_engine_config()
config = create_config()
parallel_config = config.parallel_config
assert parallel_config.ray_runtime_env is None
import ray
ray.init()
runtime_env = {
"env_vars": {
"TEST_ENV_VAR": "test_value",
},
}
config_ref = ray.remote(create_config).options(runtime_env=runtime_env).remote()
config = ray.get(config_ref)
parallel_config = config.parallel_config
assert parallel_config.ray_runtime_env is not None
assert (
parallel_config.ray_runtime_env.env_vars().get("TEST_ENV_VAR") == "test_value"
)
ray.shutdown()

View File

@@ -0,0 +1,166 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from dataclasses import dataclass
from enum import Enum
import pytest
from vllm.config.utils import get_hash_factors, hash_factors, normalize_value
# Helpers
def endswith_fqname(obj, suffix: str) -> bool:
# normalize_value(type) returns fully-qualified name
# Compare suffix to avoid brittle import paths.
out = normalize_value(obj)
return isinstance(out, str) and out.endswith(suffix)
def expected_path(p_str: str = ".") -> str:
import pathlib
p = pathlib.Path(p_str)
return p.expanduser().resolve().as_posix()
# Minimal dataclass to test get_hash_factors.
# Avoid importing heavy vLLM configs.
@dataclass
class SimpleConfig:
a: object
b: object | None = None
class DummyLogprobsMode(Enum):
RAW_LOGITS = "raw_logits"
def test_hash_factors_deterministic():
"""Test that hash_factors produces consistent SHA-256 hashes"""
factors = {"a": 1, "b": "test"}
hash1 = hash_factors(factors)
hash2 = hash_factors(factors)
assert hash1 == hash2
# Dict key insertion order should not affect the hash.
factors_reordered = {"b": "test", "a": 1}
assert hash_factors(factors_reordered) == hash1
assert len(hash1) == 64
assert all(c in "0123456789abcdef" for c in hash1)
@pytest.mark.parametrize(
"inp, expected",
[
(None, None),
(True, True),
(1, 1),
(1.0, 1.0),
("x", "x"),
(b"ab", "6162"),
(bytearray(b"ab"), "6162"),
([1, 2], (1, 2)),
({"b": 2, "a": 1}, (("a", 1), ("b", 2))),
],
)
def test_normalize_value_matrix(inp, expected):
"""Parametric input→expected normalization table."""
assert normalize_value(inp) == expected
def test_normalize_value_enum():
# Enums normalize to (module.QualName, value).
# DummyLogprobsMode uses a string payload.
out = normalize_value(DummyLogprobsMode.RAW_LOGITS)
assert isinstance(out, tuple)
assert out[0].endswith("DummyLogprobsMode")
# Expect string payload 'raw_logits'.
assert out[1] == "raw_logits"
def test_normalize_value_set_order_insensitive():
# Sets are unordered; normalize_value sorts elements for determinism.
assert normalize_value({3, 1, 2}) == normalize_value({1, 2, 3})
def test_normalize_value_path_normalization():
from pathlib import Path # local import to avoid global dependency
# Paths expand/resolve to absolute strings.
# Stabilizes hashing across working dirs.
assert normalize_value(Path(".")) == expected_path(".")
def test_normalize_value_uuid_and_to_json():
# Objects may normalize via uuid() or to_json_string().
class HasUUID:
def uuid(self):
return "test-uuid"
class ToJson:
def to_json_string(self):
return '{"x":1}'
assert normalize_value(HasUUID()) == "test-uuid"
assert normalize_value(ToJson()) == '{"x":1}'
@pytest.mark.parametrize(
"bad",
[
(lambda x: x),
(type("CallableInstance", (), {"__call__": lambda self: 0}))(),
(lambda: (lambda: 0))(), # nested function instance
],
)
def test_error_cases(bad):
"""Inputs expected to raise TypeError."""
# Reject functions/lambdas/callable instances
# to avoid under-hashing.
with pytest.raises(TypeError):
normalize_value(bad)
def test_enum_vs_int_disambiguation():
# int stays primitive
nf_int = normalize_value(1)
assert nf_int == 1
# enum becomes ("module.QualName", value)
nf_enum = normalize_value(DummyLogprobsMode.RAW_LOGITS)
assert isinstance(nf_enum, tuple) and len(nf_enum) == 2
enum_type, enum_val = nf_enum
assert enum_type.endswith(".DummyLogprobsMode")
assert enum_val == "raw_logits"
# Build factor dicts from configs with int vs enum
f_int = get_hash_factors(SimpleConfig(1), set())
f_enum = get_hash_factors(SimpleConfig(DummyLogprobsMode.RAW_LOGITS), set())
# The int case remains a primitive value
assert f_int["a"] == 1
# The enum case becomes a tagged tuple ("module.QualName", "raw_logits")
assert isinstance(f_enum["a"], tuple) and f_enum["a"][1] == "raw_logits"
# Factor dicts must differ so we don't collide primitives with Enums.
assert f_int != f_enum
# Hash digests must differ correspondingly
assert hash_factors(f_int) != hash_factors(f_enum)
# Hash functions produce stable hex strings
h_int = hash_factors(f_int)
h_enum = hash_factors(f_enum)
assert isinstance(h_int, str) and len(h_int) == 64
assert isinstance(h_enum, str) and len(h_enum) == 64
def test_classes_are_types():
"""Types normalize to FQNs; include real vLLM types."""
# Only classes allowed; functions/lambdas are rejected.
# Canonical form is the fully-qualified name.
assert isinstance(normalize_value(str), str)
class LocalDummy:
pass
assert endswith_fqname(LocalDummy, ".LocalDummy")

View File

@@ -0,0 +1,6 @@
# Same as test_config.yaml but with model specified
model: config-model
port: 12312
served_model_name: mymodel
tensor_parallel_size: 2
trust_remote_code: true

View File

@@ -0,0 +1,53 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import sys
from unittest.mock import patch
from vllm.config import VllmConfig
from vllm.engine.arg_utils import AsyncEngineArgs
from vllm.v1.engine.async_llm import AsyncLLM
def test_mp_reducer():
"""
Test that _reduce_config reducer is registered when AsyncLLM is instantiated
without transformers_modules. This is a regression test for
https://github.com/vllm-project/vllm/pull/18640.
"""
# Ensure transformers_modules is not in sys.modules
if "transformers_modules" in sys.modules:
del sys.modules["transformers_modules"]
with patch("multiprocessing.reducer.register") as mock_register:
engine_args = AsyncEngineArgs(
model="facebook/opt-125m",
max_model_len=32,
gpu_memory_utilization=0.1,
disable_log_stats=True,
)
async_llm = AsyncLLM.from_engine_args(
engine_args,
start_engine_loop=False,
)
assert mock_register.called, (
"multiprocessing.reducer.register should have been called"
)
vllm_config_registered = False
for call_args in mock_register.call_args_list:
# Verify that a reducer for VllmConfig was registered
if len(call_args[0]) >= 2 and call_args[0][0] == VllmConfig:
vllm_config_registered = True
reducer_func = call_args[0][1]
assert callable(reducer_func), "Reducer function should be callable"
break
assert vllm_config_registered, (
"VllmConfig should have been registered to multiprocessing.reducer"
)
async_llm.shutdown()

View File

@@ -0,0 +1,25 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
import pytest
from vllm.attention.backends.registry import AttentionBackendEnum
from vllm.config.multimodal import MultiModalConfig
def test_mm_encoder_attn_backend_str_conversion():
config = MultiModalConfig(mm_encoder_attn_backend="FLASH_ATTN")
assert config.mm_encoder_attn_backend == AttentionBackendEnum.FLASH_ATTN
def test_mm_encoder_attn_backend_invalid():
with pytest.raises(ValueError):
MultiModalConfig(mm_encoder_attn_backend="not_a_backend")
def test_mm_encoder_attn_backend_hash_updates():
base_hash = MultiModalConfig().compute_hash()
overridden_hash = MultiModalConfig(
mm_encoder_attn_backend=AttentionBackendEnum.FLASH_ATTN
).compute_hash()
assert base_hash != overridden_hash