[PD] Add PD support for hybrid model (Qwen3-Next, DeepSeek V3.2 Exp) (#10912)
Signed-off-by: Shangming Cai <csmthu@gmail.com> Co-authored-by: hzh0425 <hzh0425@apache.org> Co-authored-by: ZeldaHuang <hzm414167@alibaba-inc.com>
This commit is contained in:
@@ -42,6 +42,7 @@ class TestMamba(unittest.TestCase):
|
||||
full_attention_layer_ids=full_attention_layer_ids,
|
||||
enable_kvcache_transpose=False,
|
||||
device=device,
|
||||
mamba_pool=None,
|
||||
)
|
||||
assert pool._transfer_full_attention_id(global_interval - 1) == 0
|
||||
assert pool._transfer_full_attention_id(2 * global_interval - 1) == 1
|
||||
@@ -173,6 +174,7 @@ class TestMamba(unittest.TestCase):
|
||||
full_attention_layer_ids=full_attention_layer_ids,
|
||||
enable_kvcache_transpose=False,
|
||||
device=device,
|
||||
mamba_pool=req_to_token_pool.mamba_pool,
|
||||
)
|
||||
|
||||
# setup token to kv pool allocator
|
||||
|
||||
Reference in New Issue
Block a user