[Version] Drop 0.16.0 support (#7153)
### What this PR does / why we need it?
Drop 0.16.0 support in main
- Fix eagle proposer break introduced by
https://github.com/vllm-project/vllm/pull/34552. Mainly change to use
the draft attention group to initialize the attention metadata builder.
- Fix the `ModelRunner` has no attribute `cudagraph_capture_sizes`
error, which is a bug in vLLM v0.17.0, and fixed by a later pr
https://github.com/vllm-project/vllm/pull/30515
- vLLM version: v0.16.0
- vLLM main:
4034c3d32e
---------
Signed-off-by: MengqingCao <cmq0113@163.com>
This commit is contained in:
@@ -26,7 +26,6 @@ from vllm_ascend.ascend_forward_context import _EXTRA_CTX, MoECommType
|
||||
from vllm_ascend.ops.fused_moe.experts_selector import zero_experts_compute
|
||||
from vllm_ascend.ops.fused_moe.moe_comm_method import FusedExpertsResult, _MoECommMethods
|
||||
from vllm_ascend.quantization.methods.base import QuantType
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
from .experts_selector import select_experts
|
||||
from .moe_comm_method import AllGatherCommImpl310
|
||||
@@ -152,25 +151,22 @@ class AscendFusedMoE310(FusedMoE):
|
||||
self.quant_type = self.get_quant_type()
|
||||
|
||||
_MoECommMethods[MoECommType.ALLGATHER] = AllGatherCommImpl310(self.moe_config)
|
||||
if not vllm_version_is("0.16.0"):
|
||||
self.runner = self._init_runner()
|
||||
self.runner = self._init_runner()
|
||||
|
||||
if not vllm_version_is("0.16.0"):
|
||||
def _init_runner(self):
|
||||
from vllm_ascend.ops.fused_moe.fused_moe import AscendMoERunner
|
||||
|
||||
def _init_runner(self):
|
||||
from vllm_ascend.ops.fused_moe.fused_moe import AscendMoERunner
|
||||
|
||||
return AscendMoERunner(
|
||||
layer=self,
|
||||
moe_config=self.moe_config,
|
||||
router=self.router,
|
||||
routed_input_transform=self._routed_input_transform,
|
||||
gate=self.gate,
|
||||
shared_experts=self.shared_experts,
|
||||
quant_method=self.quant_method,
|
||||
reduce_results=self.reduce_results,
|
||||
enable_dbo=self.vllm_config.parallel_config.enable_dbo,
|
||||
)
|
||||
return AscendMoERunner(
|
||||
layer=self,
|
||||
moe_config=self.moe_config,
|
||||
router=self.router,
|
||||
routed_input_transform=self._routed_input_transform,
|
||||
gate=self.gate,
|
||||
shared_experts=self.shared_experts,
|
||||
quant_method=self.quant_method,
|
||||
reduce_results=self.reduce_results,
|
||||
enable_dbo=self.vllm_config.parallel_config.enable_dbo,
|
||||
)
|
||||
|
||||
def init_experts_map(self, moe_config):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user