Revert "[BugFix][Fusion] Fix graph fusion failure problem (#5253)" (#5667)

### What this PR does / why we need it?

Revert PR 5253 to fix the smoking problem

### Does this PR introduce _any_ user-facing change?

Does not.

### How was this patch tested?

It was tested in the failure case.

Signed-off-by: Rifa <865071616@qq.com>
This commit is contained in:
Fager10086
2026-01-06 21:55:47 +08:00
committed by GitHub
parent 330e25ab1d
commit 77a029979e
9 changed files with 267 additions and 36 deletions

View File

@@ -27,7 +27,7 @@ import torch_npu
import vllm.envs as envs_vllm
from torch_npu.op_plugin.atb._atb_ops import _register_atb_extensions
from torch_npu.profiler import dynamic_profile as dp
from vllm.config import CUDAGraphMode, VllmConfig, set_current_vllm_config
from vllm.config import VllmConfig, set_current_vllm_config
from vllm.distributed import (ensure_model_parallel_initialized,
init_distributed_environment)
from vllm.distributed.ec_transfer import ensure_ec_transfer_initialized
@@ -372,25 +372,11 @@ class NPUWorker(WorkerBase):
self.model_runner.eplb_warmup()
warmup_sizes = (self.vllm_config.compilation_config.compile_sizes
or []).copy()
cg_capture_sizes: list[int] = []
if self.vllm_config.compilation_config.cudagraph_mode != CUDAGraphMode.NONE:
cg_sizes = self.vllm_config.compilation_config.cudagraph_capture_sizes
cg_capture_sizes = [] if cg_sizes is None else cg_sizes
if not self.model_config.enforce_eager:
warmup_sizes = [
x for x in warmup_sizes if x not in cg_capture_sizes
x for x in warmup_sizes if x not in
self.vllm_config.compilation_config.cudagraph_capture_sizes
]
compile_ranges = self.vllm_config.compilation_config.get_compile_ranges(
)
# For each compile_range, if none of the batch sizes
# in warmup_sizes or cudagraph_capture_sizes are in the range,
# add the end of the range to ensure compilation/warmup.
all_sizes = set(cg_capture_sizes)
all_sizes.update([x for x in warmup_sizes if isinstance(x, int)])
for compile_range in compile_ranges:
if not any(x in compile_range for x in all_sizes):
warmup_sizes.append(compile_range.end)
for size in sorted(warmup_sizes, reverse=True):
logger.info("Compile and warming up model for size %d", size)
self.model_runner._dummy_run(size)