Upgrade to new vllm commit (#3719)

### What this PR does / why we need it? Upgrade to new vllm commit: c9461e05a4 - Fix many imports, caused by https://github.com/vllm-project/vllm/pull/26908 - Fix import ```sha256```, caused by https://github.com/vllm-project/vllm/pull/27169 - Remove ```SchedulerConfig.send_delta_data```, caused by https://github.com/vllm-project/vllm/pull/27142 - Fix ```FusedMoE``` because of dual stream execution, caused by https://github.com/vllm-project/vllm/pull/26440 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with new added/existing test. - vLLM version: v0.11.0rc3 - vLLM main: 17c540a993 --------- Signed-off-by: MengqingCao <cmq0113@163.com> Signed-off-by: Icey <1790571317@qq.com> Co-authored-by: MengqingCao <cmq0113@163.com>
2025-10-25 15:36:32 +08:00
parent 226f832c0b
commit d9cdc65854
37 changed files with 229 additions and 71 deletions
--- a/tests/ut/core/test_schedule_config.py
+++ b/tests/ut/core/test_schedule_config.py
@@ -78,21 +78,6 @@ class TestAscendSchedulerConfig(TestBase):
            str(context.exception),
        )

-    def test_not_implemented_send_delta_data(self):
-        with self.assertRaises(NotImplementedError) as context:
-            AscendSchedulerConfig.initialize_from_config(
-                self.basic_scheduler_config,
-                AscendSchedulerConfig(
-                    send_delta_data=True,
-                    max_num_batched_tokens=2048,
-                    max_model_len=2048,
-                ),
-            )
-        self.assertIn(
-            "currently AscendScheduler doesn't support send_delta_data",
-            str(context.exception),
-        )
-
    def test_no_override(self):
        ascend_config = AscendSchedulerConfig.initialize_from_config(
            self.basic_scheduler_config, {})
--- a/tests/ut/core/test_scheduler.py
+++ b/tests/ut/core/test_scheduler.py
@@ -9,7 +9,6 @@ from vllm.config import (CacheConfig, KVTransferConfig, ModelConfig,
 from vllm.multimodal.inputs import (MultiModalFeatureSpec,
                                    MultiModalKwargsItem, PlaceholderRange)
 from vllm.sampling_params import SamplingParams
-from vllm.utils import sha256
 from vllm.v1.core.kv_cache_utils import (get_request_block_hasher,
                                         init_none_hash)
 from vllm.v1.core.sched.output import SchedulerOutput
@@ -24,6 +23,11 @@ from vllm_ascend.core.scheduler import AscendScheduler
 from vllm_ascend.core.scheduler_dynamic_batch import SchedulerDynamicBatch
 from vllm_ascend.utils import vllm_version_is

+if vllm_version_is("0.11.0"):
+    from vllm.utils import sha256
+else:
+    from vllm.utils.hashing import sha256
+
 EOS_TOKEN_ID = 50256
 MODEL = "Qwen3-0.6B"
 ENABLE_PREFIX_CACHING = None