adapt to main2main for model runner v2 (#7578)
### What this PR does / why we need it?
This PR aims to adapt to newest commit of vllm main branch for model
runner v2. please refer to
https://github.com/vllm-project/vllm-ascend/issues/5208
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
- vLLM version: v0.18.0
- vLLM main:
ed359c497a
---------
Signed-off-by: Ronald1995 <ronaldautomobile@163.com>
This commit is contained in:
@@ -19,6 +19,7 @@ from vllm.triton_utils import HAS_TRITON
|
||||
|
||||
if HAS_TRITON:
|
||||
import vllm_ascend.patch.worker.patch_triton
|
||||
import vllm_ascend.patch.worker.patch_v2.patch_triton # noqa
|
||||
|
||||
|
||||
# isort: off
|
||||
@@ -36,8 +37,8 @@ import vllm_ascend.patch.worker.patch_qwen3_next # noqa
|
||||
import vllm_ascend.patch.worker.patch_qwen3_next_mtp # noqa
|
||||
import vllm_ascend.patch.worker.patch_qwen3_5 # noqa
|
||||
import vllm_ascend.patch.worker.patch_rejection_sampler # noqa
|
||||
import vllm_ascend.patch.worker.patch_v2_eagle # noqa
|
||||
import vllm_ascend.patch.worker.patch_v2_uva # noqa
|
||||
import vllm_ascend.patch.worker.patch_v2.patch_eagle # noqa
|
||||
import vllm_ascend.patch.worker.patch_v2.patch_uva # noqa
|
||||
import vllm_ascend.patch.worker.patch_huanyuan_vl # noqa
|
||||
import vllm_ascend.patch.worker.patch_routed_experts_capturer # noqa
|
||||
import vllm_ascend.patch.worker.patch_npugraph_ex_triton # noqa
|
||||
@@ -45,3 +46,6 @@ import vllm_ascend.patch.worker.patch_kimi_k25 # noqa
|
||||
import vllm_ascend.patch.worker.patch_draft_quarot # noqa
|
||||
import vllm_ascend.patch.worker.patch_cudagraph # noqa
|
||||
import vllm_ascend.patch.worker.patch_deepseek_mtp # noqa
|
||||
import vllm_ascend.patch.worker.patch_v2.patch_input_batch # noqa
|
||||
import vllm_ascend.patch.worker.patch_v2.patch_model_state # noqa
|
||||
import vllm_ascend.patch.worker.patch_v2.patch_block_table # noqa
|
||||
|
||||
Reference in New Issue
Block a user