diff --git a/vllm_ascend/platform.py b/vllm_ascend/platform.py index a67f054..e3777b8 100644 --- a/vllm_ascend/platform.py +++ b/vllm_ascend/platform.py @@ -270,6 +270,17 @@ class NPUPlatform(Platform): compilation_config.cudagraph_mode = CUDAGraphMode.NONE compilation_config.level = CompilationLevel.NO_COMPILATION + # TODO: Remove this check when ACL Graph supports ASCEND_LAUNCH_BLOCKING=1 + # Then, we will have to discuss the error handling strategy and user experience + if compilation_config.cudagraph_mode != CUDAGraphMode.NONE and \ + os.environ.get("ASCEND_LAUNCH_BLOCKING", "0") == "1": + raise ValueError( + "ACL graph is incompatible with ASCEND_LAUNCH_BLOCKING=1. " + "Please unset ASCEND_LAUNCH_BLOCKING or set it to 0. If you " + "need ASCEND_LAUNCH_BLOCKING for debugging, consider other methods — " + "for example, check the plog files (default: $HOME/ascend/log/debug) " + "for more information about runtime errors.") + if parallel_config and parallel_config.worker_cls == "auto": # TODO: this is a tricky way to disable `use_sequence_parallel_moe` in vllm. os.environ["VLLM_ALL2ALL_BACKEND"] = "flashinfer_all2allv"