From eccfb715f64b3764bf177a7cd547b8395ee4fb09 Mon Sep 17 00:00:00 2001 From: wangxiyuan Date: Wed, 20 Aug 2025 16:26:07 +0800 Subject: [PATCH] [CI] Fix UT (#2452) Make UT CI happy - vLLM version: v0.10.0 - vLLM main: https://github.com/vllm-project/vllm/commit/d983769c41db224e0897fac2e9aefc5f57ad1122 --------- Signed-off-by: wangxiyuan Signed-off-by: MengqingCao Co-authored-by: MengqingCao --- tests/ut/core/test_scheduler.py | 25 +++++++++++-------- .../kv_connector/test_mooncake_connector.py | 2 -- tests/ut/kv_connector/utils.py | 12 +++------ tests/ut/torchair/test_utils.py | 8 ++++-- tests/ut/worker/test_input_batch.py | 10 ++++---- 5 files changed, 30 insertions(+), 27 deletions(-) diff --git a/tests/ut/core/test_scheduler.py b/tests/ut/core/test_scheduler.py index 3c55a37..78b0c65 100644 --- a/tests/ut/core/test_scheduler.py +++ b/tests/ut/core/test_scheduler.py @@ -8,6 +8,8 @@ from vllm.config import (CacheConfig, KVTransferConfig, ModelConfig, SchedulerConfig, SpeculativeConfig, VllmConfig) from vllm.multimodal.inputs import PlaceholderRange from vllm.sampling_params import SamplingParams +from vllm.v1.core.kv_cache_utils import (get_request_block_hasher, + init_none_hash) from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig, KVCacheGroupSpec) @@ -36,7 +38,10 @@ def create_requests( mm_positions: Optional[list[PlaceholderRange]] = None, max_tokens: int = 16, stop_token_ids: Optional[list[int]] = None, + block_size: int = 3, + hash_fn=hash, ): + init_none_hash(hash_fn) prompt_logprobs = PROMPT_LOGPROBS sampling_params = SamplingParams(ignore_eos=False, max_tokens=max_tokens, @@ -46,16 +51,16 @@ def create_requests( for i in range(num_requests): mm_position = None mm_inputs = None - request = Request( - request_id=f"{i}", - prompt_token_ids=[i] * num_tokens, - sampling_params=sampling_params, - multi_modal_kwargs=mm_inputs, - multi_modal_placeholders=mm_position, - multi_modal_hashes=None, - eos_token_id=EOS_TOKEN_ID, - pooling_params=None, - ) + request = Request(request_id=f"{i}", + prompt_token_ids=[i] * num_tokens, + sampling_params=sampling_params, + multi_modal_kwargs=mm_inputs, + multi_modal_placeholders=mm_position, + multi_modal_hashes=None, + eos_token_id=EOS_TOKEN_ID, + pooling_params=None, + block_hasher=get_request_block_hasher( + block_size, hash_fn)) requests.append(request) return requests diff --git a/tests/ut/kv_connector/test_mooncake_connector.py b/tests/ut/kv_connector/test_mooncake_connector.py index 9bca0dc..f6732a0 100644 --- a/tests/ut/kv_connector/test_mooncake_connector.py +++ b/tests/ut/kv_connector/test_mooncake_connector.py @@ -1152,8 +1152,6 @@ class TestMooncakeConnectorWorker(unittest.TestCase): MagicMock()), patch.dict('sys.modules', {'vllm_ascend.envs': self.envs_ascend_mock}), - patch('vllm_ascend.distributed.mooncake_connector.envs_ascend', - self.envs_ascend_mock), ] for p in self.patches: diff --git a/tests/ut/kv_connector/utils.py b/tests/ut/kv_connector/utils.py index dd96c6b..9dc6dfc 100644 --- a/tests/ut/kv_connector/utils.py +++ b/tests/ut/kv_connector/utils.py @@ -55,7 +55,6 @@ def assert_scheduler_empty(scheduler: Scheduler): def create_vllm_config( - model: str = "facebook/opt-125m", max_num_seqs: int = 16, max_num_batched_tokens: int = 1024, block_size: int = 128, @@ -66,14 +65,11 @@ def create_vllm_config( max_num_batched_tokens=max_num_batched_tokens, max_model_len=max_num_batched_tokens, ) + fake_weight_path = os.path.join(os.path.dirname(__file__), "..", + "fake_weight") model_config = ModelConfig( - model=model, - task="auto", - tokenizer=model, - tokenizer_mode="auto", - trust_remote_code=True, - dtype="float16", - seed=42, + model=fake_weight_path, + skip_tokenizer_init=True, ) # Cache config, optionally force APC cache_config = CacheConfig( diff --git a/tests/ut/torchair/test_utils.py b/tests/ut/torchair/test_utils.py index 367a9a4..1d65fd1 100644 --- a/tests/ut/torchair/test_utils.py +++ b/tests/ut/torchair/test_utils.py @@ -51,7 +51,7 @@ class TestTorchairUtils(TestBase): mock_model_registry.return_value = mock_registry utils.register_torchair_model() - self.assertEqual(mock_model_registry.register_model.call_count, 3) + self.assertEqual(mock_model_registry.register_model.call_count, 5) call_args_list = mock_model_registry.register_model.call_args_list expected_registrations = [ @@ -63,7 +63,11 @@ class TestTorchairUtils(TestBase): ), ("DeepseekV3ForCausalLM", "vllm_ascend.torchair.models.torchair_deepseek_v3:TorchairDeepseekV3ForCausalLM" - ) + ), + ("Qwen2ForCausalLM", + "vllm_ascend.torchair.models.qwen2:CustomQwen2ForCausalLM"), + ("Qwen3ForCausalLM", + "vllm_ascend.torchair.models.qwen3_moe:CustomQwen3MoeForCausalLM") ] for i, (expected_name, diff --git a/tests/ut/worker/test_input_batch.py b/tests/ut/worker/test_input_batch.py index 3914f96..320725c 100644 --- a/tests/ut/worker/test_input_batch.py +++ b/tests/ut/worker/test_input_batch.py @@ -20,7 +20,7 @@ import numpy as np import pytest import torch from vllm.sampling_params import SamplingParams -from vllm.utils import is_pin_memory_available, make_tensor_with_pad +from vllm.utils import make_tensor_with_pad from vllm.v1.pool.metadata import PoolingMetadata from vllm.v1.sample.logits_processor import LogitsProcessors from vllm.v1.sample.metadata import SamplingMetadata @@ -237,7 +237,7 @@ def test_sampling_metadata_in_input_batch(device: str, batch_size: int): max_model_len=1024, max_num_batched_tokens=1024, device=torch.device(device), - pin_memory=is_pin_memory_available(), + pin_memory=False, vocab_size=1024, block_sizes=[1], ) @@ -298,7 +298,7 @@ def test_sampling_metadata_in_input_batch(device: str, batch_size: int): assert (expected_sampling_metadata.output_token_ids == sampling_metadata.output_token_ids) assert expected_sampling_metadata.no_penalties == \ - sampling_metadata.no_penalties + sampling_metadata.no_penalties if sampling_metadata.allowed_token_ids_mask: assert torch.allclose( expected_sampling_metadata.allowed_token_ids_mask, @@ -328,7 +328,7 @@ def test_swap_states_in_input_batch(device: str, batch_size: int, max_model_len=1024, max_num_batched_tokens=1024, device=torch.device(device), - pin_memory=is_pin_memory_available(), + pin_memory=False, vocab_size=1024, block_sizes=[1], ) @@ -337,7 +337,7 @@ def test_swap_states_in_input_batch(device: str, batch_size: int, max_model_len=1024, max_num_batched_tokens=1024, device=torch.device(device), - pin_memory=is_pin_memory_available(), + pin_memory=False, vocab_size=1024, block_sizes=[1], )