[Feat][Graph] Support DeepSeek with ACL Graph (#2707)

### What this PR does / why we need it?
In memory of #677 , a long overdue milestone. Now DeepSeek V3/R1 should
be OK with ACL Graph.

### Does this PR introduce _any_ user-facing change?
None.

### How was this patch tested?
Working on it.

- vLLM version: v0.10.2
- vLLM main:
68dbde5dbb

---------

Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com>
This commit is contained in:
yiz-liu
2025-09-16 17:50:17 +08:00
committed by GitHub
parent 3e60aa5483
commit 88ca8a051c
7 changed files with 64 additions and 42 deletions

View File

@@ -41,9 +41,10 @@ def test_row_parallel_linear(cls, mock_distributed):
assert output[0].shape == (2, 4, 64)
@patch("torch.ops.vllm.mla_forward")
@patch("torch_npu.npu_rms_norm")
def test_custom_deepseek_v2_mla_attention(mock_rms_norm, mock_distributed,
base_config):
def test_custom_deepseek_v2_mla_attention(mock_rms_norm, mock_mla_forward,
mock_distributed, base_config):
mock_rms_norm.return_value = (torch.randn(2, 128), torch.randn(2, 128))
attn = CustomDeepseekV2MLAAttention(config=base_config,
@@ -64,8 +65,8 @@ def test_custom_deepseek_v2_mla_attention(mock_rms_norm, mock_distributed,
with patch.object(attn.mla_attn,
"__call__",
return_value=torch.randn(2, 4, 128)):
with pytest.raises(AssertionError):
attn(positions, x)
attn(positions, x)
mock_mla_forward.assert_called_once()
attn = CustomDeepseekV2MLAAttention(config=base_config,
hidden_size=128,

View File

@@ -215,21 +215,6 @@ class TestAscendConfig(TestBase):
test_vllm_config.model_config = fake_model_config
init_ascend_config(test_vllm_config)
check_ascend_config(test_vllm_config, False)
# aclgraph + deepseek model
with self.assertRaises(NotImplementedError):
test_vllm_config.additional_config = {
"torchair_graph_config": {
"enabled": False,
},
"refresh": True
}
model_path = os.path.join(os.path.dirname(__file__), "fake_weight")
fake_model_config = ModelConfig(model=model_path)
fake_model_config.hf_config = PretrainedConfig()
fake_model_config.hf_config.model_type = "deepseek"
test_vllm_config.model_config = fake_model_config
init_ascend_config(test_vllm_config)
check_ascend_config(test_vllm_config, False)
def test_check_torchair_supported(self):
test_cases = [('deepseek_v3', True), ('PanguProMoE', True),