upgrade vLLM to 0.12.0 tag (#4647)

Upgrade vLLM to v0.12.0 tag

- vLLM version: 86e178f7c4d8c3b0eaf3c8e3f810a83f63b90e24
- vLLM main:
86e178f7c4

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-12-03 23:43:05 +08:00
committed by GitHub
parent 26e8e58cea
commit 3f4c0ea0a0
22 changed files with 97 additions and 47 deletions

View File

@@ -315,8 +315,8 @@ class CustomQwen3MoeDecoderLayer(Qwen3MoeDecoderLayer):
eps=config.rms_norm_eps)
self.enable_sequence_parallelism = (
vllm_config.compilation_config.pass_config.
enable_sequence_parallelism if vllm_config is not None else False)
vllm_config.compilation_config.pass_config.enable_sp
if vllm_config is not None else False)
def forward(
self,
@@ -488,7 +488,7 @@ class CustomQwen3MoeForCausalLM(Qwen3MoeForCausalLM):
self.make_empty_intermediate_tensors = (
self.model.make_empty_intermediate_tensors)
self.enable_sequence_parallelism = vllm_config.compilation_config.pass_config.enable_sequence_parallelism
self.enable_sequence_parallelism = vllm_config.compilation_config.pass_config.enable_sp
# Set MoE hyperparameters
self.expert_weights: list[torch.Tensor] = []