Upgrade to 0.11.1 newest vllm commit (#3982)

### What this PR does / why we need it? adapt vllm-ascend main branch with vllm releases/v0.11.1 fix `forward context not set` in test_vlm.py caused by: https://github.com/vllm-project/vllm/pull/23207 fix import `cdiv round` failed caused by: https://github.com/vllm-project/vllm/pull/27188 fix import `init_cached_hf_modules` failed caused by: https://github.com/vllm-project/vllm/pull/27567 adapt triton kernel `fused_recurrent_gated_delta_rule_fwd_kernel` caused by: https://github.com/vllm-project/vllm/pull/27654 - remove unused code in sigmoid_gating.py - `class FusedRecurrentFunction` , `fused_recurrent_gated_delta_rule`, `fused_recurrent_gated_delta_rule_fwd` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? CI - vLLM version: v0.11.0 - vLLM main: 83f478bb19 Signed-off-by: 22dimensions <waitingwind@foxmail.com>
2025-11-12 23:01:19 +08:00
parent 3ca11d5a7c
commit c272747d13
21 changed files with 267 additions and 251 deletions
--- a/vllm_ascend/torchair/models/torchair_deepseek_v2.py
+++ b/vllm_ascend/torchair/models/torchair_deepseek_v2.py
@@ -670,6 +670,8 @@ class TorchairDeepseekV2MLAAttention(DeepseekV2MLAAttention):
                if self.q_lora_rank is not None else None,
                q_proj=self.q_proj
                if self.q_lora_rank is None else self.q_b_proj,
+                q_b_proj=self.q_b_proj
+                if self.q_lora_rank is not None else None,
                kv_a_proj_with_mqa=self.kv_a_proj_with_mqa,
                kv_a_layernorm=self.kv_a_layernorm,
                kv_b_proj=self.kv_b_proj,
--- a/vllm_ascend/torchair/torchair_attention.py
+++ b/vllm_ascend/torchair/torchair_attention.py
@@ -26,7 +26,13 @@ from vllm.attention.backends.abstract import (AttentionImpl, AttentionLayer,
                                              AttentionType)
 from vllm.attention.backends.utils import PAD_SLOT_ID
 from vllm.config import VllmConfig
-from vllm.utils import cdiv
+
+from vllm_ascend.utils import vllm_version_is
+
+if vllm_version_is("0.11.0"):
+    from vllm.utils import cdiv
+else:
+    from vllm.utils.math_utils import cdiv

 from vllm_ascend.attention.attention_v1 import (AscendAttentionBackend,
                                                AscendAttentionMetadataBuilder,
--- a/vllm_ascend/torchair/torchair_mla.py
+++ b/vllm_ascend/torchair/torchair_mla.py
@@ -13,7 +13,13 @@ from vllm.config import VllmConfig, get_current_vllm_config
 from vllm.distributed import get_tensor_model_parallel_world_size
 from vllm.model_executor.layers.linear import (LinearBase,
                                               UnquantizedLinearMethod)
-from vllm.utils import cdiv, round_down
+
+from vllm_ascend.utils import vllm_version_is
+
+if vllm_version_is("0.11.0"):
+    from vllm.utils import cdiv, round_down
+else:
+    from vllm.utils.math_utils import cdiv, round_down

 import vllm_ascend.envs as envs_ascend
 from vllm_ascend.ascend_config import get_ascend_config
--- a/vllm_ascend/torchair/torchair_sfa.py
+++ b/vllm_ascend/torchair/torchair_sfa.py
@@ -14,7 +14,13 @@ from vllm.config import VllmConfig, get_current_vllm_config
 from vllm.distributed import get_tensor_model_parallel_world_size, get_tp_group
 from vllm.model_executor.layers.linear import (LinearBase,
                                               UnquantizedLinearMethod)
-from vllm.utils import cdiv, round_down
+
+from vllm_ascend.utils import vllm_version_is
+
+if vllm_version_is("0.11.0"):
+    from vllm.utils import cdiv, round_down
+else:
+    from vllm.utils.math_utils import cdiv, round_down

 import vllm_ascend.envs as envs_ascend
 from vllm_ascend.ascend_config import get_ascend_config