[PD] Add PD support for hybrid model (Qwen3-Next, DeepSeek V3.2 Exp) (#10912)
Signed-off-by: Shangming Cai <csmthu@gmail.com> Co-authored-by: hzh0425 <hzh0425@apache.org> Co-authored-by: ZeldaHuang <hzm414167@alibaba-inc.com>
This commit is contained in:
@@ -807,9 +807,6 @@ class Scheduler(
|
||||
self.tree_cache.cache_controller.layer_done_counter
|
||||
)
|
||||
elif self.is_hybrid:
|
||||
assert (
|
||||
self.server_args.disaggregation_mode == "null"
|
||||
), "Hybrid mode does not support disaggregation yet"
|
||||
self.tree_cache = SWARadixCache(
|
||||
req_to_token_pool=self.req_to_token_pool,
|
||||
token_to_kv_pool_allocator=self.token_to_kv_pool_allocator,
|
||||
@@ -819,9 +816,6 @@ class Scheduler(
|
||||
is_eagle=self.spec_algorithm.is_eagle(),
|
||||
)
|
||||
elif self.is_hybrid_gdn:
|
||||
assert (
|
||||
self.server_args.disaggregation_mode == "null"
|
||||
), "Hybrid GDN mode does not support disaggregation yet"
|
||||
self.tree_cache = MambaRadixCache(
|
||||
req_to_token_pool=self.req_to_token_pool,
|
||||
token_to_kv_pool_allocator=self.token_to_kv_pool_allocator,
|
||||
|
||||
Reference in New Issue
Block a user