[Feat][Graph] Support DeepSeek with ACL Graph (#2707)

### What this PR does / why we need it? In memory of #677 , a long overdue milestone. Now DeepSeek V3/R1 should be OK with ACL Graph. ### Does this PR introduce _any_ user-facing change? None. ### How was this patch tested? Working on it. - vLLM version: v0.10.2 - vLLM main: 68dbde5dbb --------- Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com>
2025-09-16 17:50:17 +08:00
parent 3e60aa5483
commit 88ca8a051c
7 changed files with 64 additions and 42 deletions
--- a/tests/ut/models/test_deepseek_v2.py
+++ b/tests/ut/models/test_deepseek_v2.py
@@ -41,9 +41,10 @@ def test_row_parallel_linear(cls, mock_distributed):
    assert output[0].shape == (2, 4, 64)


+@patch("torch.ops.vllm.mla_forward")
@patch("torch_npu.npu_rms_norm")
-def test_custom_deepseek_v2_mla_attention(mock_rms_norm, mock_distributed,
-                                          base_config):
+def test_custom_deepseek_v2_mla_attention(mock_rms_norm, mock_mla_forward,
+                                          mock_distributed, base_config):
    mock_rms_norm.return_value = (torch.randn(2, 128), torch.randn(2, 128))

    attn = CustomDeepseekV2MLAAttention(config=base_config,
@@ -64,8 +65,8 @@ def test_custom_deepseek_v2_mla_attention(mock_rms_norm, mock_distributed,
    with patch.object(attn.mla_attn,
                      "__call__",
                      return_value=torch.randn(2, 4, 128)):
-        with pytest.raises(AssertionError):
-            attn(positions, x)
+        attn(positions, x)
+        mock_mla_forward.assert_called_once()

    attn = CustomDeepseekV2MLAAttention(config=base_config,
                                        hidden_size=128,
--- a/tests/ut/test_ascend_config.py
+++ b/tests/ut/test_ascend_config.py
@@ -215,21 +215,6 @@ class TestAscendConfig(TestBase):
            test_vllm_config.model_config = fake_model_config
            init_ascend_config(test_vllm_config)
            check_ascend_config(test_vllm_config, False)
-        # aclgraph + deepseek model
-        with self.assertRaises(NotImplementedError):
-            test_vllm_config.additional_config = {
-                "torchair_graph_config": {
-                    "enabled": False,
-                },
-                "refresh": True
-            }
-            model_path = os.path.join(os.path.dirname(__file__), "fake_weight")
-            fake_model_config = ModelConfig(model=model_path)
-            fake_model_config.hf_config = PretrainedConfig()
-            fake_model_config.hf_config.model_type = "deepseek"
-            test_vllm_config.model_config = fake_model_config
-            init_ascend_config(test_vllm_config)
-            check_ascend_config(test_vllm_config, False)

    def test_check_torchair_supported(self):
        test_cases = [('deepseek_v3', True), ('PanguProMoE', True),