enable mm allreduce test (#2192)

### What this PR does / why we need it?
This PR is to add e2e test for using npu_mm_all_reduce_base fusion
kernel.
### Does this PR introduce _any_ user-facing change?
no

### How was this patch tested?
not involved

- vLLM version: v0.10.0
- vLLM main:
5d5d419ca6

Signed-off-by: Ronald1995 <ronaldautomobile@163.com>
This commit is contained in:
Ronald1995
2025-08-07 17:19:23 +08:00
committed by GitHub
parent 4604882a3e
commit b2598c3271
2 changed files with 40 additions and 0 deletions

View File

@@ -25,6 +25,7 @@ from torch.nn.parameter import Parameter
from vllm.distributed import (get_tensor_model_parallel_rank,
split_tensor_along_last_dim)
from vllm.distributed.parallel_state import get_tp_group
from vllm.logger import logger
from vllm.model_executor.layers.linear import RowParallelLinear
from vllm_ascend import envs
@@ -142,4 +143,5 @@ class AscendRowParallelLinear(RowParallelLinear):
if envs.VLLM_ASCEND_ENABLE_MATMUL_ALLREDUCE:
logger.info("AscendRowParallelLinear: Matmul all-reduce is enabled. ")
vllm.model_executor.layers.linear.RowParallelLinear = AscendRowParallelLinear