[BugFix][Fusion] Fix graph fusion failure problem (#5676)
Currently, the vllm pull request
(https://github.com/vllm-project/vllm/pull/24252) is causing operator
fusion to fail. This issue was previously fixed by patching the backend.
The root cause has been identified, and the problem can be resolved with
this pull request.
- vLLM version: v0.13.0
- vLLM main:
2f4e6548ef
---------
Signed-off-by: wxsIcey <1790571317@qq.com>
This commit is contained in:
@@ -28,7 +28,7 @@ import torch_npu
|
||||
import vllm.envs as envs_vllm
|
||||
from torch_npu.op_plugin.atb._atb_ops import _register_atb_extensions
|
||||
from torch_npu.profiler import dynamic_profile as dp
|
||||
from vllm.config import VllmConfig, set_current_vllm_config
|
||||
from vllm.config import CUDAGraphMode, VllmConfig, set_current_vllm_config
|
||||
from vllm.distributed import (ensure_model_parallel_initialized,
|
||||
init_distributed_environment)
|
||||
from vllm.distributed.ec_transfer import ensure_ec_transfer_initialized
|
||||
@@ -381,10 +381,25 @@ class NPUWorker(WorkerBase):
|
||||
warmup_sizes = (self.vllm_config.compilation_config.compile_sizes
|
||||
or []).copy()
|
||||
if not self.model_config.enforce_eager:
|
||||
warmup_sizes = [
|
||||
x for x in warmup_sizes if x not in
|
||||
self.vllm_config.compilation_config.cudagraph_capture_sizes
|
||||
]
|
||||
cg_capture_sizes: list[int] = []
|
||||
if self.vllm_config.compilation_config.cudagraph_mode != CUDAGraphMode.NONE:
|
||||
cg_sizes = self.vllm_config.compilation_config.cudagraph_capture_sizes
|
||||
cg_capture_sizes = [] if cg_sizes is None else cg_sizes
|
||||
warmup_sizes = [
|
||||
x for x in warmup_sizes if x not in cg_capture_sizes
|
||||
]
|
||||
|
||||
compile_ranges = self.vllm_config.compilation_config.get_compile_ranges(
|
||||
)
|
||||
# For each compile_range, if none of the batch sizes
|
||||
# in warmup_sizes or cudagraph_capture_sizes are in the range,
|
||||
# add the end of the range to ensure compilation/warmup.
|
||||
all_sizes = set(cg_capture_sizes)
|
||||
all_sizes.update([x for x in warmup_sizes if isinstance(x, int)])
|
||||
for compile_range in compile_ranges:
|
||||
if not any(x in compile_range for x in all_sizes):
|
||||
warmup_sizes.append(compile_range.end)
|
||||
|
||||
for size in sorted(warmup_sizes, reverse=True):
|
||||
logger.info("Compile and warming up model for size %d", size)
|
||||
self.model_runner._dummy_run(size)
|
||||
|
||||
Reference in New Issue
Block a user