cleanup useless torchair logic (#4856)

This PR clean up useless torchair logic in model runner. The moge doc is
only for torchair, it can be removed as well.

- vLLM version: v0.12.0
- vLLM main:
ad32e3e19c

Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com>
Co-authored-by: Mengqing Cao <cmq0113@163.com>
This commit is contained in:
wangxiyuan
2025-12-11 11:21:13 +08:00
committed by GitHub
parent c12eb22cbe
commit bb76f7962c
7 changed files with 22 additions and 307 deletions

View File

@@ -26,7 +26,7 @@ from vllm.platforms import Platform, PlatformEnum
# todo: please remove it when solve cuda hard code in vllm
os.environ["VLLM_DISABLE_SHARED_EXPERTS_STREAM"] = "1"
from vllm_ascend.ascend_config import check_ascend_config, init_ascend_config
from vllm_ascend.ascend_config import init_ascend_config
from vllm_ascend.utils import refresh_block_size
# isort: off
@@ -181,7 +181,6 @@ class NPUPlatform(Platform):
else:
enforce_eager = getattr(model_config, "enforce_eager", False)
check_ascend_config(vllm_config, enforce_eager)
from vllm.config.compilation import CUDAGraphMode
if enforce_eager:
logger.info("Compilation disabled, using eager mode by default")