[Bugfix] Disable torch.compile() (#370)

### What this PR does / why we need it? To resolve this [patch](https://github.com/vllm-project/vllm-ascend/pull/236/files#diff-43b96b39b5a52fe209d86449ad703a7ff5e1349ebaf1aa12ece8d82163ee5b61R24-R49) , we need to set `torch.compile()` backend to `eager` to disable compile, using default pytorch way. --------- Signed-off-by: shen-shanshan <467638484@qq.com>
2025-03-21 15:55:51 +08:00
parent 9a175ca0fc
commit 89ca63a2c2
4 changed files with 30 additions and 16 deletions
--- a/vllm_ascend/platform.py
+++ b/vllm_ascend/platform.py
@@ -40,7 +40,7 @@ class NPUPlatform(Platform):
    _enum = PlatformEnum.OOT
    device_name: str = "npu"
    device_type: str = "npu"
-    simple_compile_backend: str = "npu"
+    simple_compile_backend: str = "eager"  # Disable torch.compile()
    ray_device_key: str = "NPU"
    device_control_env_var: str = "ASCEND_RT_VISIBLE_DEVICES"
    dispatch_key: str = "PrivateUse1"
@@ -99,11 +99,13 @@ class NPUPlatform(Platform):
        if parallel_config.worker_cls == "auto":
            if envs.VLLM_USE_V1:
                parallel_config.worker_cls = "vllm_ascend.worker.worker_v1.NPUWorker"
+            elif vllm_config.speculative_config:
+                parallel_config.worker_cls = "vllm.spec_decode.spec_decode_worker.create_spec_worker"
+                parallel_config.sd_worker_cls = "vllm_ascend.worker.worker.NPUWorker"
+            elif vllm_config.scheduler_config.is_multi_step:
+                parallel_config.worker_cls = "vllm_ascend.worker.multi_step_worker.MultiStepWorker"
            else:
-                if vllm_config.scheduler_config.is_multi_step:
-                    parallel_config.worker_cls = "vllm_ascend.worker.multi_step_worker.MultiStepWorker"
-                else:
-                    parallel_config.worker_cls = "vllm_ascend.worker.worker.NPUWorker"
+                parallel_config.worker_cls = "vllm_ascend.worker.worker.NPUWorker"

        cache_config = vllm_config.cache_config
        if cache_config and cache_config.block_size is None: