[Feature] Support XiaoMi MIMO Flash V2 (#62)

* [Feature] Support MIMO Flash V2
2025-12-31 10:16:33 +08:00
parent 341dc7f296
commit b3c30a3cb9
12 changed files with 1530 additions and 690 deletions
--- a/vllm_kunlun/ops/paged_attn.py
+++ b/vllm_kunlun/ops/paged_attn.py
@@ -8,14 +8,8 @@ from typing import List, Optional, Tuple
 from vllm.platforms import current_platform


-if current_platform.is_kunlun():
-    from vllm_kunlun.ops._kunlun_ops import KunlunOps as ops
-else:
-    from vllm import _custom_ops as ops
-    from vllm.triton_utils.importing import HAS_TRITON
+from vllm_kunlun.ops._kunlun_ops import KunlunOps as ops

-    if HAS_TRITON:
-        from vllm.attention.ops.prefix_prefill import context_attention_fwd

 # Should be the same as PARTITION_SIZE in `paged_attention_v2_launcher`.
 _PARTITION_SIZE = 512