[bugfix]fix multistream moe in torchair (#3164)

### What this PR does / why we need it? the multistream moe in tochari only validate in decode, but can't be applied to chunked prefill, So add some judgments to isolate the scenario ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 Signed-off-by: hust17yixuan <303660421@qq.com>
2025-10-09 19:00:32 +08:00
parent 94dd832815
commit 30c5d947c3
2 changed files with 6 additions and 5 deletions
--- a/tests/ut/torchair/models/test_torchair_deepseek_v2.py
+++ b/tests/ut/torchair/models/test_torchair_deepseek_v2.py
@@ -176,7 +176,7 @@ def test_torchair_deepseek_v2_merged_replicated_linear(mock_distributed):
    TorchairDeepseekV2RowParallelLinearReplaceAllreduce,
    TorchairDeepseekV2RowParallelLinear
 ])
-def test_row_parallel_linear(cls, mock_distributed):
+def test_row_parallel_linear(cls, mock_distributed, mock_forward_context):
    linear = cls(input_size=128, output_size=64, bias=False, quant_config=None)
    linear.quant_method = Mock()
    linear.quant_method.apply.return_value = torch.randn(2, 4, 64)
@@ -282,7 +282,7 @@ def test_torchair_deepseek_v2_decoder_layer(mock_maybe_chunk_residual,
                                            mock_maybe_wait_prefetch_done,
                                            mock_rms_norm, mock_add_norm,
                                            mock_distributed, base_config,
-                                            vllm_config):
+                                            vllm_config, mock_forward_context):
    mock_rms_norm.return_value = (torch.randn(2, 128), torch.randn(2, 128))
    mock_add_norm.return_value = (torch.randn(2, 128), torch.randn(2, 128),
                                  torch.randn(2, 128))