[EPLB][bugfix] Bugfix for fused mc2 (#6794)

### What this PR does / why we need it?
This pull request addresses a bug related to the fused mc2 functionality
within the EPLB (Expert Parallelism Load Balancing) system, specifically
impacting quantization and MoE communication.
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?

- vLLM version: v0.15.0
- vLLM main:
83b47f67b1

Signed-off-by: Spicy-Stick <873805887@qq.com>
Signed-off-by: root <root@localhost.localdomain>
This commit is contained in:
JIACHENG XU
2026-03-09 11:26:57 +08:00
committed by GitHub
parent 06ec136f08
commit 23bf5d4d48
5 changed files with 50 additions and 28 deletions

View File

@@ -4,6 +4,7 @@ from unittest.mock import MagicMock, patch
import torch
from vllm_ascend.eplb.adaptor.vllm_adaptor import VllmEplbAdaptor
from vllm_ascend.quantization.methods.base import QuantType
from transformers import DeepseekV2Config
@@ -17,6 +18,8 @@ class TestVllmAdaptor(unittest.TestCase):
mock_model.get_expert_map.return_value = [i for i in range(n_routed_experts)]
mock_model.get_log2phy_map.return_value = [i for i in range(n_routed_experts)]
self.model = mock_model
num_dense_layers = getattr(config, "first_k_dense_replace", 0)
self.model.model.layers[num_dense_layers].mlp.experts.quant_type = QuantType.W8A8
self.mock_rank = patch("vllm_ascend.eplb.adaptor.vllm_adaptor.dist.get_rank", return_value=0).start()
self.mock_size = patch("vllm_ascend.eplb.adaptor.vllm_adaptor.dist.get_world_size", return_value=4).start()