Upgrade to 0.11.1 newest vllm commit (#3982)
### What this PR does / why we need it?
adapt vllm-ascend main branch with vllm releases/v0.11.1
fix `forward context not set` in test_vlm.py caused by:
https://github.com/vllm-project/vllm/pull/23207
fix import `cdiv round` failed caused by:
https://github.com/vllm-project/vllm/pull/27188
fix import `init_cached_hf_modules` failed caused by:
https://github.com/vllm-project/vllm/pull/27567
adapt triton kernel `fused_recurrent_gated_delta_rule_fwd_kernel` caused
by: https://github.com/vllm-project/vllm/pull/27654
- remove unused code in sigmoid_gating.py
- `class FusedRecurrentFunction` , `fused_recurrent_gated_delta_rule`,
`fused_recurrent_gated_delta_rule_fwd`
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
CI
- vLLM version: v0.11.0
- vLLM main:
83f478bb19
Signed-off-by: 22dimensions <waitingwind@foxmail.com>
This commit is contained in:
@@ -670,6 +670,8 @@ class TorchairDeepseekV2MLAAttention(DeepseekV2MLAAttention):
|
||||
if self.q_lora_rank is not None else None,
|
||||
q_proj=self.q_proj
|
||||
if self.q_lora_rank is None else self.q_b_proj,
|
||||
q_b_proj=self.q_b_proj
|
||||
if self.q_lora_rank is not None else None,
|
||||
kv_a_proj_with_mqa=self.kv_a_proj_with_mqa,
|
||||
kv_a_layernorm=self.kv_a_layernorm,
|
||||
kv_b_proj=self.kv_b_proj,
|
||||
|
||||
@@ -26,7 +26,13 @@ from vllm.attention.backends.abstract import (AttentionImpl, AttentionLayer,
|
||||
AttentionType)
|
||||
from vllm.attention.backends.utils import PAD_SLOT_ID
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.utils import cdiv
|
||||
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
if vllm_version_is("0.11.0"):
|
||||
from vllm.utils import cdiv
|
||||
else:
|
||||
from vllm.utils.math_utils import cdiv
|
||||
|
||||
from vllm_ascend.attention.attention_v1 import (AscendAttentionBackend,
|
||||
AscendAttentionMetadataBuilder,
|
||||
|
||||
@@ -13,7 +13,13 @@ from vllm.config import VllmConfig, get_current_vllm_config
|
||||
from vllm.distributed import get_tensor_model_parallel_world_size
|
||||
from vllm.model_executor.layers.linear import (LinearBase,
|
||||
UnquantizedLinearMethod)
|
||||
from vllm.utils import cdiv, round_down
|
||||
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
if vllm_version_is("0.11.0"):
|
||||
from vllm.utils import cdiv, round_down
|
||||
else:
|
||||
from vllm.utils.math_utils import cdiv, round_down
|
||||
|
||||
import vllm_ascend.envs as envs_ascend
|
||||
from vllm_ascend.ascend_config import get_ascend_config
|
||||
|
||||
@@ -14,7 +14,13 @@ from vllm.config import VllmConfig, get_current_vllm_config
|
||||
from vllm.distributed import get_tensor_model_parallel_world_size, get_tp_group
|
||||
from vllm.model_executor.layers.linear import (LinearBase,
|
||||
UnquantizedLinearMethod)
|
||||
from vllm.utils import cdiv, round_down
|
||||
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
if vllm_version_is("0.11.0"):
|
||||
from vllm.utils import cdiv, round_down
|
||||
else:
|
||||
from vllm.utils.math_utils import cdiv, round_down
|
||||
|
||||
import vllm_ascend.envs as envs_ascend
|
||||
from vllm_ascend.ascend_config import get_ascend_config
|
||||
|
||||
Reference in New Issue
Block a user