[CI] Fix UT (#2452)
Make UT CI happy
- vLLM version: v0.10.0
- vLLM main:
d983769c41
---------
Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
Signed-off-by: MengqingCao <cmq0113@163.com>
Co-authored-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
@@ -8,6 +8,8 @@ from vllm.config import (CacheConfig, KVTransferConfig, ModelConfig,
|
|||||||
SchedulerConfig, SpeculativeConfig, VllmConfig)
|
SchedulerConfig, SpeculativeConfig, VllmConfig)
|
||||||
from vllm.multimodal.inputs import PlaceholderRange
|
from vllm.multimodal.inputs import PlaceholderRange
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
|
from vllm.v1.core.kv_cache_utils import (get_request_block_hasher,
|
||||||
|
init_none_hash)
|
||||||
from vllm.v1.core.sched.output import SchedulerOutput
|
from vllm.v1.core.sched.output import SchedulerOutput
|
||||||
from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
|
from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig,
|
||||||
KVCacheGroupSpec)
|
KVCacheGroupSpec)
|
||||||
@@ -36,7 +38,10 @@ def create_requests(
|
|||||||
mm_positions: Optional[list[PlaceholderRange]] = None,
|
mm_positions: Optional[list[PlaceholderRange]] = None,
|
||||||
max_tokens: int = 16,
|
max_tokens: int = 16,
|
||||||
stop_token_ids: Optional[list[int]] = None,
|
stop_token_ids: Optional[list[int]] = None,
|
||||||
|
block_size: int = 3,
|
||||||
|
hash_fn=hash,
|
||||||
):
|
):
|
||||||
|
init_none_hash(hash_fn)
|
||||||
prompt_logprobs = PROMPT_LOGPROBS
|
prompt_logprobs = PROMPT_LOGPROBS
|
||||||
sampling_params = SamplingParams(ignore_eos=False,
|
sampling_params = SamplingParams(ignore_eos=False,
|
||||||
max_tokens=max_tokens,
|
max_tokens=max_tokens,
|
||||||
@@ -46,16 +51,16 @@ def create_requests(
|
|||||||
for i in range(num_requests):
|
for i in range(num_requests):
|
||||||
mm_position = None
|
mm_position = None
|
||||||
mm_inputs = None
|
mm_inputs = None
|
||||||
request = Request(
|
request = Request(request_id=f"{i}",
|
||||||
request_id=f"{i}",
|
prompt_token_ids=[i] * num_tokens,
|
||||||
prompt_token_ids=[i] * num_tokens,
|
sampling_params=sampling_params,
|
||||||
sampling_params=sampling_params,
|
multi_modal_kwargs=mm_inputs,
|
||||||
multi_modal_kwargs=mm_inputs,
|
multi_modal_placeholders=mm_position,
|
||||||
multi_modal_placeholders=mm_position,
|
multi_modal_hashes=None,
|
||||||
multi_modal_hashes=None,
|
eos_token_id=EOS_TOKEN_ID,
|
||||||
eos_token_id=EOS_TOKEN_ID,
|
pooling_params=None,
|
||||||
pooling_params=None,
|
block_hasher=get_request_block_hasher(
|
||||||
)
|
block_size, hash_fn))
|
||||||
requests.append(request)
|
requests.append(request)
|
||||||
return requests
|
return requests
|
||||||
|
|
||||||
|
|||||||
@@ -1152,8 +1152,6 @@ class TestMooncakeConnectorWorker(unittest.TestCase):
|
|||||||
MagicMock()),
|
MagicMock()),
|
||||||
patch.dict('sys.modules',
|
patch.dict('sys.modules',
|
||||||
{'vllm_ascend.envs': self.envs_ascend_mock}),
|
{'vllm_ascend.envs': self.envs_ascend_mock}),
|
||||||
patch('vllm_ascend.distributed.mooncake_connector.envs_ascend',
|
|
||||||
self.envs_ascend_mock),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
for p in self.patches:
|
for p in self.patches:
|
||||||
|
|||||||
@@ -55,7 +55,6 @@ def assert_scheduler_empty(scheduler: Scheduler):
|
|||||||
|
|
||||||
|
|
||||||
def create_vllm_config(
|
def create_vllm_config(
|
||||||
model: str = "facebook/opt-125m",
|
|
||||||
max_num_seqs: int = 16,
|
max_num_seqs: int = 16,
|
||||||
max_num_batched_tokens: int = 1024,
|
max_num_batched_tokens: int = 1024,
|
||||||
block_size: int = 128,
|
block_size: int = 128,
|
||||||
@@ -66,14 +65,11 @@ def create_vllm_config(
|
|||||||
max_num_batched_tokens=max_num_batched_tokens,
|
max_num_batched_tokens=max_num_batched_tokens,
|
||||||
max_model_len=max_num_batched_tokens,
|
max_model_len=max_num_batched_tokens,
|
||||||
)
|
)
|
||||||
|
fake_weight_path = os.path.join(os.path.dirname(__file__), "..",
|
||||||
|
"fake_weight")
|
||||||
model_config = ModelConfig(
|
model_config = ModelConfig(
|
||||||
model=model,
|
model=fake_weight_path,
|
||||||
task="auto",
|
skip_tokenizer_init=True,
|
||||||
tokenizer=model,
|
|
||||||
tokenizer_mode="auto",
|
|
||||||
trust_remote_code=True,
|
|
||||||
dtype="float16",
|
|
||||||
seed=42,
|
|
||||||
)
|
)
|
||||||
# Cache config, optionally force APC
|
# Cache config, optionally force APC
|
||||||
cache_config = CacheConfig(
|
cache_config = CacheConfig(
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ class TestTorchairUtils(TestBase):
|
|||||||
mock_model_registry.return_value = mock_registry
|
mock_model_registry.return_value = mock_registry
|
||||||
utils.register_torchair_model()
|
utils.register_torchair_model()
|
||||||
|
|
||||||
self.assertEqual(mock_model_registry.register_model.call_count, 3)
|
self.assertEqual(mock_model_registry.register_model.call_count, 5)
|
||||||
call_args_list = mock_model_registry.register_model.call_args_list
|
call_args_list = mock_model_registry.register_model.call_args_list
|
||||||
|
|
||||||
expected_registrations = [
|
expected_registrations = [
|
||||||
@@ -63,7 +63,11 @@ class TestTorchairUtils(TestBase):
|
|||||||
),
|
),
|
||||||
("DeepseekV3ForCausalLM",
|
("DeepseekV3ForCausalLM",
|
||||||
"vllm_ascend.torchair.models.torchair_deepseek_v3:TorchairDeepseekV3ForCausalLM"
|
"vllm_ascend.torchair.models.torchair_deepseek_v3:TorchairDeepseekV3ForCausalLM"
|
||||||
)
|
),
|
||||||
|
("Qwen2ForCausalLM",
|
||||||
|
"vllm_ascend.torchair.models.qwen2:CustomQwen2ForCausalLM"),
|
||||||
|
("Qwen3ForCausalLM",
|
||||||
|
"vllm_ascend.torchair.models.qwen3_moe:CustomQwen3MoeForCausalLM")
|
||||||
]
|
]
|
||||||
|
|
||||||
for i, (expected_name,
|
for i, (expected_name,
|
||||||
|
|||||||
@@ -20,7 +20,7 @@ import numpy as np
|
|||||||
import pytest
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
from vllm.sampling_params import SamplingParams
|
from vllm.sampling_params import SamplingParams
|
||||||
from vllm.utils import is_pin_memory_available, make_tensor_with_pad
|
from vllm.utils import make_tensor_with_pad
|
||||||
from vllm.v1.pool.metadata import PoolingMetadata
|
from vllm.v1.pool.metadata import PoolingMetadata
|
||||||
from vllm.v1.sample.logits_processor import LogitsProcessors
|
from vllm.v1.sample.logits_processor import LogitsProcessors
|
||||||
from vllm.v1.sample.metadata import SamplingMetadata
|
from vllm.v1.sample.metadata import SamplingMetadata
|
||||||
@@ -237,7 +237,7 @@ def test_sampling_metadata_in_input_batch(device: str, batch_size: int):
|
|||||||
max_model_len=1024,
|
max_model_len=1024,
|
||||||
max_num_batched_tokens=1024,
|
max_num_batched_tokens=1024,
|
||||||
device=torch.device(device),
|
device=torch.device(device),
|
||||||
pin_memory=is_pin_memory_available(),
|
pin_memory=False,
|
||||||
vocab_size=1024,
|
vocab_size=1024,
|
||||||
block_sizes=[1],
|
block_sizes=[1],
|
||||||
)
|
)
|
||||||
@@ -298,7 +298,7 @@ def test_sampling_metadata_in_input_batch(device: str, batch_size: int):
|
|||||||
assert (expected_sampling_metadata.output_token_ids ==
|
assert (expected_sampling_metadata.output_token_ids ==
|
||||||
sampling_metadata.output_token_ids)
|
sampling_metadata.output_token_ids)
|
||||||
assert expected_sampling_metadata.no_penalties == \
|
assert expected_sampling_metadata.no_penalties == \
|
||||||
sampling_metadata.no_penalties
|
sampling_metadata.no_penalties
|
||||||
if sampling_metadata.allowed_token_ids_mask:
|
if sampling_metadata.allowed_token_ids_mask:
|
||||||
assert torch.allclose(
|
assert torch.allclose(
|
||||||
expected_sampling_metadata.allowed_token_ids_mask,
|
expected_sampling_metadata.allowed_token_ids_mask,
|
||||||
@@ -328,7 +328,7 @@ def test_swap_states_in_input_batch(device: str, batch_size: int,
|
|||||||
max_model_len=1024,
|
max_model_len=1024,
|
||||||
max_num_batched_tokens=1024,
|
max_num_batched_tokens=1024,
|
||||||
device=torch.device(device),
|
device=torch.device(device),
|
||||||
pin_memory=is_pin_memory_available(),
|
pin_memory=False,
|
||||||
vocab_size=1024,
|
vocab_size=1024,
|
||||||
block_sizes=[1],
|
block_sizes=[1],
|
||||||
)
|
)
|
||||||
@@ -337,7 +337,7 @@ def test_swap_states_in_input_batch(device: str, batch_size: int,
|
|||||||
max_model_len=1024,
|
max_model_len=1024,
|
||||||
max_num_batched_tokens=1024,
|
max_num_batched_tokens=1024,
|
||||||
device=torch.device(device),
|
device=torch.device(device),
|
||||||
pin_memory=is_pin_memory_available(),
|
pin_memory=False,
|
||||||
vocab_size=1024,
|
vocab_size=1024,
|
||||||
block_sizes=[1],
|
block_sizes=[1],
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user