[Misc] Remove VLLM_USE_V1 usage in code (#1764)

We plan to remove V0 code from this version. The first step is to delete
v0 usage.

Related: https://github.com/vllm-project/vllm-ascend/issues/1620

- vLLM version: v0.9.2
- vLLM main:
61e20828da

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
This commit is contained in:
wangxiyuan
2025-07-15 11:52:16 +08:00
committed by GitHub
parent 494b0f474f
commit 7bdada58eb
6 changed files with 100 additions and 217 deletions

View File

@@ -16,7 +16,6 @@
#
import gc
import os
from datetime import timedelta
from typing import TYPE_CHECKING, Optional, Tuple
@@ -117,6 +116,8 @@ class NPUPlatform(Platform):
@classmethod
def check_and_update_config(cls, vllm_config: VllmConfig) -> None:
if not envs.VLLM_USE_V1:
raise ValueError("vLLM Ascend does not support V0 engine")
# initialize ascend config from vllm additional_config
ascend_config = init_ascend_config(vllm_config)
@@ -180,18 +181,7 @@ class NPUPlatform(Platform):
update_aclgraph_sizes(vllm_config)
if parallel_config and parallel_config.worker_cls == "auto":
if envs.VLLM_USE_V1:
parallel_config.worker_cls = "vllm_ascend.worker.worker_v1.NPUWorker"
elif vllm_config.speculative_config:
# NOTE: We set this var to `1` in vllm-ascend to avoid segment
# fault when using spec decode with V0 engine.
os.environ["ACL_OP_INIT_MODE"] = "1"
parallel_config.worker_cls = "vllm.spec_decode.spec_decode_worker.create_spec_worker"
parallel_config.sd_worker_cls = "vllm_ascend.worker.worker.NPUWorker"
elif vllm_config.scheduler_config.is_multi_step:
parallel_config.worker_cls = "vllm_ascend.worker.multi_step_worker.MultiStepWorker"
else:
parallel_config.worker_cls = "vllm_ascend.worker.worker.NPUWorker"
parallel_config.worker_cls = "vllm_ascend.worker.worker_v1.NPUWorker"
if cache_config:
if cache_config.block_size is None:
@@ -202,20 +192,18 @@ class NPUPlatform(Platform):
)
cache_config.block_size = 128
if envs.VLLM_USE_V1:
# Activate custom ops for v1, except on 310P
if not is_310p():
compilation_config.custom_ops = ["all"]
# Activate custom ops for v1, except on 310P
if not is_310p():
compilation_config.custom_ops = ["all"]
# If ascend_scheduler_config is enabled,
# extents original scheduler_config to use AscendScheduler.
if ascend_config.ascend_scheduler_config.enabled:
from vllm_ascend.core.schedule_config import \
AscendSchedulerConfig
ascend_scheduler_config = AscendSchedulerConfig.initialize_from_config(
vllm_config.scheduler_config,
ascend_config.ascend_scheduler_config)
vllm_config.scheduler_config = ascend_scheduler_config
# If ascend_scheduler_config is enabled,
# extents original scheduler_config to use AscendScheduler.
if ascend_config.ascend_scheduler_config.enabled:
from vllm_ascend.core.schedule_config import AscendSchedulerConfig
ascend_scheduler_config = AscendSchedulerConfig.initialize_from_config(
vllm_config.scheduler_config,
ascend_config.ascend_scheduler_config)
vllm_config.scheduler_config = ascend_scheduler_config
@classmethod
def get_attn_backend_cls(cls, selected_backend, head_size, dtype,