### What this PR does / why we need it?
This reverts commit
bf87606932.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
E2E vllm serving with `enable_shared_expert_dp: true` in eager mode as
before.
- vLLM version: v0.11.0rc3
- vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0
Signed-off-by: linfeng-yuan <1102311262@qq.com>
This commit is contained in:
@@ -1,5 +1,4 @@
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
@@ -54,9 +53,7 @@ class TestAscendRMSNorm(PytestBase):
|
||||
# Test case for the most common and basic scenario
|
||||
@pytest.mark.parametrize(
|
||||
"residual", [None, torch.randn(4, 8, dtype=torch.float16)])
|
||||
@patch("torch.ops.vllm.maybe_chunk_residual")
|
||||
def test_forward_oot_basic(self, mock_maybe_chunk_residual, residual):
|
||||
mock_maybe_chunk_residual.side_effect = lambda x, residual: residual
|
||||
def test_forward_oot_basic(self, residual):
|
||||
layer = RMSNorm(hidden_size=8, eps=1e-05)
|
||||
x = torch.randn(4, 8, dtype=torch.float16)
|
||||
if residual is not None:
|
||||
@@ -120,8 +117,6 @@ class TestAscendRMSNorm(PytestBase):
|
||||
mock_forward_context.layer_idx = 0
|
||||
mock_forward_context.num_hidden_layers = num_hidden_layers
|
||||
mock_forward_context.fusion_linear = "gate_up_dense"
|
||||
mocker.patch("torch.ops.vllm.maybe_chunk_residual",
|
||||
lambda x, residual: residual)
|
||||
|
||||
# Ensure fusion and layer_idx increment are handled correctly
|
||||
x = torch.randn(4, 8, dtype=torch.float16)
|
||||
|
||||
Reference in New Issue
Block a user