[PD] Add PD support for hybrid model (Qwen3-Next, DeepSeek V3.2 Exp) (#10912)

Signed-off-by: Shangming Cai <csmthu@gmail.com>
Co-authored-by: hzh0425 <hzh0425@apache.org>
Co-authored-by: ZeldaHuang <hzm414167@alibaba-inc.com>
This commit is contained in:
Shangming Cai
2025-10-16 09:59:14 +08:00
committed by GitHub
parent 97d857c096
commit 868403f642
13 changed files with 730 additions and 189 deletions

View File

@@ -807,9 +807,6 @@ class Scheduler(
self.tree_cache.cache_controller.layer_done_counter
)
elif self.is_hybrid:
assert (
self.server_args.disaggregation_mode == "null"
), "Hybrid mode does not support disaggregation yet"
self.tree_cache = SWARadixCache(
req_to_token_pool=self.req_to_token_pool,
token_to_kv_pool_allocator=self.token_to_kv_pool_allocator,
@@ -819,9 +816,6 @@ class Scheduler(
is_eagle=self.spec_algorithm.is_eagle(),
)
elif self.is_hybrid_gdn:
assert (
self.server_args.disaggregation_mode == "null"
), "Hybrid GDN mode does not support disaggregation yet"
self.tree_cache = MambaRadixCache(
req_to_token_pool=self.req_to_token_pool,
token_to_kv_pool_allocator=self.token_to_kv_pool_allocator,