[Feat][Graph] Support DeepSeek with ACL Graph (#2707)
### What this PR does / why we need it?
In memory of #677 , a long overdue milestone. Now DeepSeek V3/R1 should
be OK with ACL Graph.
### Does this PR introduce _any_ user-facing change?
None.
### How was this patch tested?
Working on it.
- vLLM version: v0.10.2
- vLLM main:
68dbde5dbb
---------
Signed-off-by: Yizhou Liu <liu_yizhou@outlook.com>
This commit is contained in:
@@ -41,9 +41,10 @@ def test_row_parallel_linear(cls, mock_distributed):
|
||||
assert output[0].shape == (2, 4, 64)
|
||||
|
||||
|
||||
@patch("torch.ops.vllm.mla_forward")
|
||||
@patch("torch_npu.npu_rms_norm")
|
||||
def test_custom_deepseek_v2_mla_attention(mock_rms_norm, mock_distributed,
|
||||
base_config):
|
||||
def test_custom_deepseek_v2_mla_attention(mock_rms_norm, mock_mla_forward,
|
||||
mock_distributed, base_config):
|
||||
mock_rms_norm.return_value = (torch.randn(2, 128), torch.randn(2, 128))
|
||||
|
||||
attn = CustomDeepseekV2MLAAttention(config=base_config,
|
||||
@@ -64,8 +65,8 @@ def test_custom_deepseek_v2_mla_attention(mock_rms_norm, mock_distributed,
|
||||
with patch.object(attn.mla_attn,
|
||||
"__call__",
|
||||
return_value=torch.randn(2, 4, 128)):
|
||||
with pytest.raises(AssertionError):
|
||||
attn(positions, x)
|
||||
attn(positions, x)
|
||||
mock_mla_forward.assert_called_once()
|
||||
|
||||
attn = CustomDeepseekV2MLAAttention(config=base_config,
|
||||
hidden_size=128,
|
||||
|
||||
@@ -215,21 +215,6 @@ class TestAscendConfig(TestBase):
|
||||
test_vllm_config.model_config = fake_model_config
|
||||
init_ascend_config(test_vllm_config)
|
||||
check_ascend_config(test_vllm_config, False)
|
||||
# aclgraph + deepseek model
|
||||
with self.assertRaises(NotImplementedError):
|
||||
test_vllm_config.additional_config = {
|
||||
"torchair_graph_config": {
|
||||
"enabled": False,
|
||||
},
|
||||
"refresh": True
|
||||
}
|
||||
model_path = os.path.join(os.path.dirname(__file__), "fake_weight")
|
||||
fake_model_config = ModelConfig(model=model_path)
|
||||
fake_model_config.hf_config = PretrainedConfig()
|
||||
fake_model_config.hf_config.model_type = "deepseek"
|
||||
test_vllm_config.model_config = fake_model_config
|
||||
init_ascend_config(test_vllm_config)
|
||||
check_ascend_config(test_vllm_config, False)
|
||||
|
||||
def test_check_torchair_supported(self):
|
||||
test_cases = [('deepseek_v3', True), ('PanguProMoE', True),
|
||||
|
||||
Reference in New Issue
Block a user