[CI] Update vllm version to 20250922(5aeb925) (#3091)

### What this PR does / why we need it? This pr bump vllm commit hash to 5aeb925452 fix issues: 1. https://github.com/vllm-project/vllm/pull/25345 has remove v0 metadata 2. https://github.com/vllm-project/vllm/pull/25332 3. https://github.com/vllm-project/vllm/pull/25334 4. https://github.com/vllm-project/vllm/pull/23558, note that this vllm commit update the model register logic, which will check all the model registered have the `vllm.model_executor.models` path , which breaks our custom registration of the deepseek_v3 model (it doesn't exist in the vllm model path). so I move deepseek_v3 model registy to deepseek_v2 to solve temporary ### How was this patch tested? - vLLM version: v0.10.2 - vLLM main: 9607d5eb44 --------- Signed-off-by: wangli <wangli858794774@gmail.com>
2025-09-22 22:18:13 +08:00
parent 1c9f0fe26f
commit 02f89d166f
21 changed files with 58 additions and 92 deletions
--- a/vllm_ascend/models/deepseek_mtp.py
+++ b/vllm_ascend/models/deepseek_mtp.py
@@ -28,7 +28,6 @@ from vllm.config import (CacheConfig, ModelConfig, VllmConfig,
 from vllm.model_executor.layers.layernorm import RMSNorm
 from vllm.model_executor.layers.logits_processor import LogitsProcessor
 from vllm.model_executor.layers.quantization import QuantizationConfig
-from vllm.model_executor.layers.sampler import get_sampler
 from vllm.model_executor.layers.vocab_parallel_embedding import (
    ParallelLMHead, VocabParallelEmbedding)
 from vllm.model_executor.models.deepseek_mtp import (
@@ -36,7 +35,6 @@ from vllm.model_executor.models.deepseek_mtp import (
    SharedHead)
 from vllm.model_executor.models.deepseek_v2 import DeepseekV2DecoderLayer
 from vllm.model_executor.models.utils import maybe_prefix
-from vllm.model_executor.sampling_metadata import SamplingMetadata
 from vllm.sequence import IntermediateTensors


@@ -168,7 +166,7 @@ class CustomDeepSeekMultiTokenPredictor(DeepSeekMultiTokenPredictor):
    def compute_logits(
        self,
        hidden_states: torch.Tensor,
-        sampling_metadata: SamplingMetadata,
+        sampling_metadata,  # type: ignore
        spec_step_idx: int = 0,
    ) -> torch.Tensor:
        current_step_idx = (spec_step_idx % self.num_mtp_layers)
@@ -188,8 +186,6 @@ class CustomDeepSeekMTP(DeepSeekMTP):
                                                       prefix=maybe_prefix(
                                                           prefix, "model"))

-        self.sampler = get_sampler()
-
    def forward(
        self,
        input_ids: torch.Tensor,
@@ -204,4 +200,4 @@ class CustomDeepSeekMTP(DeepSeekMTP):
        hidden_states = self.model(input_ids, positions, kv_caches,
                                   attn_metadata, previous_hidden_states,
                                   inputs_embeds, spec_step_idx)
-        return hidden_states
+        return hidden_states