cleanup useless torchair logic (#4856)

This PR clean up useless torchair logic in model runner. The moge doc is only for torchair, it can be removed as well. - vLLM version: v0.12.0 - vLLM main: ad32e3e19c Signed-off-by: wangxiyuan <wangxiyuan1007@gmail.com> Co-authored-by: Mengqing Cao <cmq0113@163.com>
2025-12-11 11:21:13 +08:00
parent c12eb22cbe
commit bb76f7962c
7 changed files with 22 additions and 307 deletions
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -26,7 +26,7 @@ from vllm.platforms import Platform, PlatformEnum
 # todo: please remove it when solve cuda hard code in vllm
 os.environ["VLLM_DISABLE_SHARED_EXPERTS_STREAM"] = "1"

-from vllm_ascend.ascend_config import check_ascend_config, init_ascend_config
+from vllm_ascend.ascend_config import init_ascend_config
 from vllm_ascend.utils import refresh_block_size

 # isort: off
@@ -181,7 +181,6 @@ class NPUPlatform(Platform):
        else:
            enforce_eager = getattr(model_config, "enforce_eager", False)

-        check_ascend_config(vllm_config, enforce_eager)
        from vllm.config.compilation import CUDAGraphMode
        if enforce_eager:
            logger.info("Compilation disabled, using eager mode by default")