enable mm allreduce test (#2192)
### What this PR does / why we need it?
This PR is to add e2e test for using npu_mm_all_reduce_base fusion
kernel.
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
not involved
- vLLM version: v0.10.0
- vLLM main:
5d5d419ca6
Signed-off-by: Ronald1995 <ronaldautomobile@163.com>
This commit is contained in:
@@ -25,6 +25,7 @@ from torch.nn.parameter import Parameter
|
||||
from vllm.distributed import (get_tensor_model_parallel_rank,
|
||||
split_tensor_along_last_dim)
|
||||
from vllm.distributed.parallel_state import get_tp_group
|
||||
from vllm.logger import logger
|
||||
from vllm.model_executor.layers.linear import RowParallelLinear
|
||||
|
||||
from vllm_ascend import envs
|
||||
@@ -142,4 +143,5 @@ class AscendRowParallelLinear(RowParallelLinear):
|
||||
|
||||
|
||||
if envs.VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE:
|
||||
logger.info("AscendRowParallelLinear: Matmul all-reduce is enabled. ")
|
||||
vllm.model_executor.layers.linear.RowParallelLinear = AscendRowParallelLinear
|
||||
|
||||
Reference in New Issue
Block a user