[Main2Main] Upgrade vLLM to 0226 (#6813)
### What this PR does / why we need it?
Breaking:
1. https://github.com/vllm-project/vllm/pull/33452
2. https://github.com/vllm-project/vllm/pull/33451
3. https://github.com/vllm-project/vllm/pull/32567
4. https://github.com/vllm-project/vllm/pull/32344
### Does this PR introduce _any_ user-facing change?
### How was this patch tested?
- vLLM version: v0.15.0
- vLLM main:
83b47f67b1
---------
Signed-off-by: MrZ20 <2609716663@qq.com>
Signed-off-by: gcanlin <canlinguosdu@gmail.com>
Co-authored-by: MrZ20 <2609716663@qq.com>
This commit is contained in:
@@ -16,6 +16,8 @@
|
||||
#
|
||||
import torch
|
||||
from torch._inductor.pattern_matcher import Match, PatternMatcherPass, PatternPrettyPrinter
|
||||
from vllm.compilation.passes.inductor_pass import get_pass_context
|
||||
from vllm.compilation.passes.vllm_inductor_pass import VllmInductorPass
|
||||
from vllm.config import VllmConfig
|
||||
from vllm.config.compilation import Range
|
||||
from vllm.distributed import get_tensor_model_parallel_world_size, tensor_model_parallel_all_reduce
|
||||
@@ -24,14 +26,6 @@ from vllm.logger import logger
|
||||
|
||||
from vllm_ascend.compilation.passes.base_pattern import BasePattern
|
||||
from vllm_ascend.compilation.passes.utils.npugraph_ex_utils_check import extra_stream_scope_check
|
||||
from vllm_ascend.utils import vllm_version_is
|
||||
|
||||
if vllm_version_is("0.15.0"):
|
||||
from vllm.compilation.inductor_pass import get_pass_context # type: ignore
|
||||
from vllm.compilation.vllm_inductor_pass import VllmInductorPass # type: ignore
|
||||
else:
|
||||
from vllm.compilation.passes.inductor_pass import get_pass_context
|
||||
from vllm.compilation.passes.vllm_inductor_pass import VllmInductorPass
|
||||
|
||||
# computation-communication tiling block is 512
|
||||
ALLREDUCE_NORM_FUSE_THRESHOLD = 512
|
||||
|
||||
Reference in New Issue
Block a user