Upgrade to 0.11.1 newest vllm commit (#3762)

### What this PR does / why we need it? c9461e05a4 Fix ```spec decode rejection sampler```, caused by https://github.com/vllm-project/vllm/pull/26060 Fix some ```import```, caused by https://github.com/vllm-project/vllm/pull/27374 Fix ```scheduler_config.send_delta_data```, caused by https://github.com/vllm-project/vllm-ascend/pull/3719 Fix ```init_with_cudagraph_sizes```, caused by https://github.com/vllm-project/vllm/pull/26016 Fix ```vl model```of replacing PatchEmbed's conv3d to linear layer, caused by https://github.com/vllm-project/vllm/pull/27418 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with new added/existing test. - vLLM version: v0.11.0rc3 - vLLM main: c9461e05a4 --------- Signed-off-by: Icey <1790571317@qq.com>
2025-10-28 14:55:03 +08:00
parent f846bd20e4
commit a7450db1bd
12 changed files with 175 additions and 51 deletions
--- a/vllm_ascend/spec_decode/eagle_proposer.py
+++ b/vllm_ascend/spec_decode/eagle_proposer.py
@@ -12,7 +12,6 @@ from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase
 from vllm.model_executor.model_loader import get_model
 from vllm.model_executor.models import supports_multimodal
 from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM
-from vllm.utils import is_pin_memory_available
 from vllm.v1.core.sched.output import SchedulerOutput
 from vllm.v1.sample.metadata import SamplingMetadata
 from vllm.v1.spec_decode.metadata import SpecDecodeMetadata
@@ -27,8 +26,10 @@ from vllm_ascend.utils import vllm_version_is

 if vllm_version_is("0.11.0"):
    from vllm.config import CompilationLevel
+    from vllm.utils import is_pin_memory_available
 else:
    from vllm.config import CompilationMode
+    from vllm.utils.platform_utils import is_pin_memory_available

 PADDING_SLOT_ID = -1