diff --git a/docs/source/user_guide/configuration/additional_config.md b/docs/source/user_guide/configuration/additional_config.md index cc6a86f5..5a5f12c5 100644 --- a/docs/source/user_guide/configuration/additional_config.md +++ b/docs/source/user_guide/configuration/additional_config.md @@ -76,7 +76,8 @@ The details of each configuration option are as follows: | Name | Type | Default | Description | | ---- | ---- | ------- | ----------- | | `fuse_norm_quant` | bool | `True` | Whether to enable fuse_norm_quant pass. | -| `fuse_qknorm_rope` | bool | `False` | Whether to enable fuse_qknorm_rope pass. It's set to True by default when Triton is installed. | +| `fuse_qknorm_rope` | bool | `True` | Whether to enable fuse_qknorm_rope pass. If Triton is not in the environment, set it to False. | +| `fuse_allreduce_rms` | bool | `False` | Whether to enable fuse_allreduce_rms pass. It's set to False because of conflict with SP. | **eplb_config** diff --git a/vllm_ascend/ascend_config.py b/vllm_ascend/ascend_config.py index 06299fd5..1d119dbf 100644 --- a/vllm_ascend/ascend_config.py +++ b/vllm_ascend/ascend_config.py @@ -17,7 +17,6 @@ import os from typing import TYPE_CHECKING from vllm.logger import logger -from vllm.triton_utils import HAS_TRITON from vllm.utils.math_utils import cdiv if TYPE_CHECKING: @@ -190,7 +189,7 @@ class AscendCompilationConfig: """ def __init__( - self, fuse_norm_quant: bool = True, fuse_qknorm_rope: bool = False, fuse_allreduce_rms: bool = False, **kwargs + self, fuse_norm_quant: bool = True, fuse_qknorm_rope: bool = True, fuse_allreduce_rms: bool = False, **kwargs ): """ Initialize the configuration. @@ -200,13 +199,13 @@ class AscendCompilationConfig: When set to True, the system will optimize norm and quant operations. Default: True fuse_qknorm_rope (bool): Whether to enable qknorm and rope fusion optimization. - Default: False + Default: True fuse_allreduce_rms (bool): Whether to enable allreduce and addrmsnorm fusion optimization. Default: False **kwargs: Additional optional parameters for forward compatibility and configuration extension. """ self.fuse_norm_quant = fuse_norm_quant - self.fuse_qknorm_rope = HAS_TRITON or fuse_qknorm_rope + self.fuse_qknorm_rope = fuse_qknorm_rope self.fuse_allreduce_rms = fuse_allreduce_rms