Upgrade to new vllm commit (#3719)
### What this PR does / why we need it? Upgrade to new vllm commit:c9461e05a4- Fix many imports, caused by https://github.com/vllm-project/vllm/pull/26908 - Fix import ```sha256```, caused by https://github.com/vllm-project/vllm/pull/27169 - Remove ```SchedulerConfig.send_delta_data```, caused by https://github.com/vllm-project/vllm/pull/27142 - Fix ```FusedMoE``` because of dual stream execution, caused by https://github.com/vllm-project/vllm/pull/26440 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with new added/existing test. - vLLM version: v0.11.0rc3 - vLLM main:17c540a993--------- Signed-off-by: MengqingCao <cmq0113@163.com> Signed-off-by: Icey <1790571317@qq.com> Co-authored-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
@@ -5,6 +5,7 @@ import torch
|
||||
from vllm.config import CacheConfig, ModelConfig, ParallelConfig, VllmConfig
|
||||
|
||||
from tests.ut.base import TestBase
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
|
||||
class TestNPUWorker(TestBase):
|
||||
@@ -178,15 +179,26 @@ class TestNPUWorker(TestBase):
|
||||
# Create NPUWorker instance
|
||||
from vllm_ascend.worker.worker_v1 import NPUWorker
|
||||
|
||||
with patch("vllm.utils.STR_DTYPE_TO_TORCH_DTYPE",
|
||||
{"float32": torch.float32}):
|
||||
worker = NPUWorker(
|
||||
vllm_config=self.vllm_config_mock,
|
||||
local_rank=self.local_rank,
|
||||
rank=self.rank,
|
||||
distributed_init_method=self.distributed_init_method,
|
||||
is_driver_worker=self.is_driver_worker,
|
||||
)
|
||||
if vllm_version_is("0.11.0"):
|
||||
with patch("vllm.utils.STR_DTYPE_TO_TORCH_DTYPE",
|
||||
{"float32": torch.float32}):
|
||||
worker = NPUWorker(
|
||||
vllm_config=self.vllm_config_mock,
|
||||
local_rank=self.local_rank,
|
||||
rank=self.rank,
|
||||
distributed_init_method=self.distributed_init_method,
|
||||
is_driver_worker=self.is_driver_worker,
|
||||
)
|
||||
else:
|
||||
with patch("vllm.utils.torch_utils.STR_DTYPE_TO_TORCH_DTYPE",
|
||||
{"float32": torch.float32}):
|
||||
worker = NPUWorker(
|
||||
vllm_config=self.vllm_config_mock,
|
||||
local_rank=self.local_rank,
|
||||
rank=self.rank,
|
||||
distributed_init_method=self.distributed_init_method,
|
||||
is_driver_worker=self.is_driver_worker,
|
||||
)
|
||||
|
||||
# Verify cache_dtype is set to custom value
|
||||
self.assertEqual(worker.cache_dtype, torch.float32)
|
||||
|
||||
Reference in New Issue
Block a user