Drop vLLM 0.13.0 support (#6069)

### What this PR does / why we need it?
Drop vLLM 0.13.0 support, upgrade to 0.14.0

- vLLM version: v0.13.0
- vLLM main:
d68209402d

---------

Signed-off-by: hfadzxy <starmoon_zhang@163.com>
This commit is contained in:
zhangxinyuehfad
2026-01-23 09:45:08 +08:00
committed by GitHub
parent 27a513b672
commit 819a4459ce
39 changed files with 86 additions and 272 deletions

View File

@@ -27,5 +27,5 @@ if os.getenv("DYNAMIC_EPLB", "false").lower() in ("true", "1") or os.getenv(
"EXPERT_MAP_RECORD", "false") == "true":
import vllm_ascend.patch.platform.patch_multiproc_executor # noqa
if envs.VLLM_ASCEND_BALANCE_SCHEDULING and vllm_version_is('0.13.0'):
if envs.VLLM_ASCEND_BALANCE_SCHEDULING and vllm_version_is('0.14.0'):
import vllm_ascend.patch.platform.patch_balance_schedule # noqa

View File

@@ -28,6 +28,7 @@ from vllm.model_executor.layers.mamba.ops.causal_conv1d import (
from vllm.model_executor.models.qwen3_next import (Qwen3NextGatedDeltaNet,
fused_gdn_gating)
from vllm.triton_utils import triton
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadata
from vllm_ascend.ops.triton.fla.fused_qkvzba_split_reshape import \
@@ -35,14 +36,6 @@ from vllm_ascend.ops.triton.fla.fused_qkvzba_split_reshape import \
from vllm_ascend.ops.triton.fla.sigmoid_gating import \
fused_sigmoid_gating_delta_rule_update
from vllm_ascend.ops.triton.fused_gdn_gating import fused_gdn_gating_patch
from vllm_ascend.utils import vllm_version_is
# isort: off
if vllm_version_is('0.13.0'):
from vllm.attention.backends.abstract import AttentionMetadata # type: ignore
else:
from vllm.v1.attention.backend import AttentionMetadata # type: ignore
# isort: on
class AscendQwen3Next_GatedDeltaNet(nn.Module, MambaBase):