[CI] fix ci (#2464)

### What this PR does / why we need it? 1. use action/checkout@v5 instead of v4 2. remove dbo test case because there is issue with it and will be refactored later 3. make vllm-ascend compatible with vllm v0.10.1.1 and add CI for it 4. fix sampler api changes introduced by https://github.com/vllm-project/vllm/pull/22387 6. fix qwen3 moe config changes intruoduced by https://github.com/vllm-project/vllm/pull/20562 7. fix kvcache block changes introduced by https://github.com/vllm-project/vllm/pull/23262 ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with existing test. - vLLM version: v0.10.0 - vLLM main: 0c6e40bbaa --------- Signed-off-by: MengqingCao <cmq0113@163.com>
2025-08-22 07:30:48 +08:00
parent 0ca3f48c90
commit b0403f8d8a
27 changed files with 389 additions and 199 deletions
--- a/vllm_ascend/models/qwen3_moe.py
+++ b/vllm_ascend/models/qwen3_moe.py
@@ -50,6 +50,7 @@ from vllm.sequence import IntermediateTensors
 from vllm_ascend.ops.fused_moe import AscendFusedMoE
 from vllm_ascend.ops.sequence_parallel import (MetadataForPadding,
                                               init_metadata_for_sp)
+from vllm_ascend.utils import vllm_version_is


 class CustomSparseMoeBlock(Qwen3MoeSparseMoeBlock):
@@ -253,7 +254,11 @@ class CustomQwen3MoeModel(Qwen3MoeModel):
        quant_config = vllm_config.quant_config

        parallel_config = vllm_config.parallel_config
-        self.num_redundant_experts = parallel_config.num_redundant_experts
+        if vllm_version_is("0.10.1.1"):
+            self.num_redundant_experts = parallel_config.num_redundant_experts
+        else:
+            eplb_config = parallel_config.eplb_config
+            self.num_redundant_experts = eplb_config.num_redundant_experts
        self.padding_idx = config.pad_token_id
        self.vocab_size = config.vocab_size
        self.config = config