From 7799c4ca3be0f6d7541b014a1a4dfbcccb33d8b2 Mon Sep 17 00:00:00 2001 From: Icey <1790571317@qq.com> Date: Sat, 24 Jan 2026 22:49:33 +0800 Subject: [PATCH] [Fusion] change fusion env variable (#6201) ### What this PR does / why we need it? Since CI has integrated Triton, `fuse_qknorm_rope` is enabled by default. ### Does this PR introduce _any_ user-facing change? N/A ### How was this patch tested? CI passed with new added/existing test. - vLLM version: v0.14.0 - vLLM main: https://github.com/vllm-project/vllm/commit/d68209402ddab3f54a09bc1f4de9a9495a283b60 --------- Signed-off-by: wxsIcey <1790571317@qq.com> --- docs/source/user_guide/configuration/additional_config.md | 3 ++- vllm_ascend/ascend_config.py | 7 +++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/source/user_guide/configuration/additional_config.md b/docs/source/user_guide/configuration/additional_config.md index cc6a86f5..5a5f12c5 100644 --- a/docs/source/user_guide/configuration/additional_config.md +++ b/docs/source/user_guide/configuration/additional_config.md @@ -76,7 +76,8 @@ The details of each configuration option are as follows: | Name | Type | Default | Description | | ---- | ---- | ------- | ----------- | | `fuse_norm_quant` | bool | `True` | Whether to enable fuse_norm_quant pass. | -| `fuse_qknorm_rope` | bool | `False` | Whether to enable fuse_qknorm_rope pass. It's set to True by default when Triton is installed. | +| `fuse_qknorm_rope` | bool | `True` | Whether to enable fuse_qknorm_rope pass. If Triton is not in the environment, set it to False. | +| `fuse_allreduce_rms` | bool | `False` | Whether to enable fuse_allreduce_rms pass. It's set to False because of conflict with SP. | **eplb_config** diff --git a/vllm_ascend/ascend_config.py b/vllm_ascend/ascend_config.py index 06299fd5..1d119dbf 100644 --- a/vllm_ascend/ascend_config.py +++ b/vllm_ascend/ascend_config.py @@ -17,7 +17,6 @@ import os from typing import TYPE_CHECKING from vllm.logger import logger -from vllm.triton_utils import HAS_TRITON from vllm.utils.math_utils import cdiv if TYPE_CHECKING: @@ -190,7 +189,7 @@ class AscendCompilationConfig: """ def __init__( - self, fuse_norm_quant: bool = True, fuse_qknorm_rope: bool = False, fuse_allreduce_rms: bool = False, **kwargs + self, fuse_norm_quant: bool = True, fuse_qknorm_rope: bool = True, fuse_allreduce_rms: bool = False, **kwargs ): """ Initialize the configuration. @@ -200,13 +199,13 @@ class AscendCompilationConfig: When set to True, the system will optimize norm and quant operations. Default: True fuse_qknorm_rope (bool): Whether to enable qknorm and rope fusion optimization. - Default: False + Default: True fuse_allreduce_rms (bool): Whether to enable allreduce and addrmsnorm fusion optimization. Default: False **kwargs: Additional optional parameters for forward compatibility and configuration extension. """ self.fuse_norm_quant = fuse_norm_quant - self.fuse_qknorm_rope = HAS_TRITON or fuse_qknorm_rope + self.fuse_qknorm_rope = fuse_qknorm_rope self.fuse_allreduce_rms = fuse_allreduce_rms