[2/N][refactor] torchair deepseek mla backend refactor (#2459)
### What this PR does / why we need it?
This PR move current unified mla backend to torchair folder and remove
torchair-related code in attention/mla_v1.py (1.3k -> 0.9k).
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
Running eager mode with mla backend, and torchair mode with code before
[2445](https://github.com/vllm-project/vllm-ascend/pull/2445)
- vLLM version: v0.10.0
- vLLM main:
f571ff8eb6
Signed-off-by: linfeng-yuan <1102311262@qq.com>
This commit is contained in:
@@ -235,12 +235,18 @@ class NPUPlatform(Platform):
|
||||
raise ValueError("vLLM Ascend does not support V0 engine.")
|
||||
|
||||
use_torchair = get_ascend_config().torchair_graph_config.enabled
|
||||
if use_mla:
|
||||
return "vllm_ascend.attention.mla_v1.AscendMLABackend"
|
||||
elif use_torchair:
|
||||
return "vllm_ascend.torchair.torchair_attention.AscendAttentionTorchairBackend"
|
||||
else:
|
||||
return "vllm_ascend.attention.attention_v1.AscendAttentionBackend"
|
||||
# choose attention backend based on use_mla and use_torchair
|
||||
backend_map = {
|
||||
(True, True):
|
||||
"vllm_ascend.torchair.torchair_mla.AscendMLATorchairBackend",
|
||||
(True, False):
|
||||
"vllm_ascend.attention.mla_v1.AscendMLABackend",
|
||||
(False, True):
|
||||
"vllm_ascend.torchair.torchair_attention.AscendAttentionTorchairBackend",
|
||||
(False, False):
|
||||
"vllm_ascend.attention.attention_v1.AscendAttentionBackend"
|
||||
}
|
||||
return backend_map[(use_mla, use_torchair)]
|
||||
|
||||
@classmethod
|
||||
def get_punica_wrapper(cls) -> str:
|
||||
|
||||
Reference in New Issue
Block a user