[CI] Update vllm version to 20250922(5aeb925) (#3091)
### What this PR does / why we need it? This pr bump vllm commit hash to5aeb925452fix issues: 1. https://github.com/vllm-project/vllm/pull/25345 has remove v0 metadata 2. https://github.com/vllm-project/vllm/pull/25332 3. https://github.com/vllm-project/vllm/pull/25334 4. https://github.com/vllm-project/vllm/pull/23558, note that this vllm commit update the model register logic, which will check all the model registered have the `vllm.model_executor.models` path , which breaks our custom registration of the deepseek_v3 model (it doesn't exist in the vllm model path). so I move deepseek_v3 model registy to deepseek_v2 to solve temporary ### How was this patch tested? - vLLM version: v0.10.2 - vLLM main:9607d5eb44--------- Signed-off-by: wangli <wangli858794774@gmail.com>
This commit is contained in:
@@ -45,7 +45,6 @@ from vllm.model_executor.layers.linear import (LinearBase,
|
||||
from vllm.model_executor.layers.logits_processor import LogitsProcessor
|
||||
from vllm.model_executor.layers.quantization import QuantizationConfig
|
||||
from vllm.model_executor.layers.rotary_embedding import get_rope
|
||||
from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler
|
||||
from vllm.model_executor.layers.vocab_parallel_embedding import (
|
||||
ParallelLMHead, VocabParallelEmbedding)
|
||||
from vllm.model_executor.model_loader.weight_utils import default_weight_loader
|
||||
@@ -53,9 +52,9 @@ from vllm.model_executor.models.interfaces import SupportsPP
|
||||
from vllm.model_executor.models.utils import (
|
||||
extract_layer_index, is_pp_missing_parameter,
|
||||
make_empty_intermediate_tensors_factory, make_layers, maybe_prefix)
|
||||
from vllm.model_executor.sampling_metadata import SamplingMetadata
|
||||
from vllm.model_executor.utils import set_weight_attrs
|
||||
from vllm.sequence import IntermediateTensors
|
||||
from vllm.v1.sample.sampler import Sampler
|
||||
|
||||
from vllm_ascend.ascend_config import get_ascend_config
|
||||
from vllm_ascend.utils import ACL_FORMAT_FRACTAL_NZ, is_310p
|
||||
@@ -913,7 +912,7 @@ class PanguProMoEForCausalLM(nn.Module, SupportsPP):
|
||||
if self.config.tie_word_embeddings:
|
||||
self.lm_head.weight = self.model.embed_tokens.weight
|
||||
self.logits_processor = LogitsProcessor(config.vocab_size)
|
||||
self.sampler = get_sampler()
|
||||
self.sampler = Sampler()
|
||||
self.make_empty_intermediate_tensors = (
|
||||
self.model.make_empty_intermediate_tensors)
|
||||
|
||||
@@ -935,19 +934,19 @@ class PanguProMoEForCausalLM(nn.Module, SupportsPP):
|
||||
return hidden_states
|
||||
|
||||
def compute_logits(
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
sampling_metadata: SamplingMetadata,
|
||||
self,
|
||||
hidden_states: torch.Tensor,
|
||||
sampling_metadata, # type: ignore
|
||||
) -> Optional[torch.Tensor]:
|
||||
logits = self.logits_processor(self.lm_head, hidden_states,
|
||||
sampling_metadata)
|
||||
return logits
|
||||
|
||||
def sample(
|
||||
self,
|
||||
logits: Optional[torch.Tensor],
|
||||
sampling_metadata: SamplingMetadata,
|
||||
) -> Optional[SamplerOutput]:
|
||||
self,
|
||||
logits: Optional[torch.Tensor],
|
||||
sampling_metadata, # type: ignore
|
||||
):
|
||||
next_tokens = self.sampler(logits, sampling_metadata)
|
||||
return next_tokens
|
||||
|
||||
|
||||
Reference in New Issue
Block a user