[PD] Add PD support for hybrid model (Qwen3-Next, DeepSeek V3.2 Exp) (#10912)

Signed-off-by: Shangming Cai <csmthu@gmail.com>
Co-authored-by: hzh0425 <hzh0425@apache.org>
Co-authored-by: ZeldaHuang <hzm414167@alibaba-inc.com>
This commit is contained in:
Shangming Cai
2025-10-16 09:59:14 +08:00
committed by GitHub
parent 97d857c096
commit 868403f642
13 changed files with 730 additions and 189 deletions

View File

@@ -42,6 +42,7 @@ class TestMamba(unittest.TestCase):
full_attention_layer_ids=full_attention_layer_ids,
enable_kvcache_transpose=False,
device=device,
mamba_pool=None,
)
assert pool._transfer_full_attention_id(global_interval - 1) == 0
assert pool._transfer_full_attention_id(2 * global_interval - 1) == 1
@@ -173,6 +174,7 @@ class TestMamba(unittest.TestCase):
full_attention_layer_ids=full_attention_layer_ids,
enable_kvcache_transpose=False,
device=device,
mamba_pool=req_to_token_pool.mamba_pool,
)
# setup token to kv pool allocator