[PD] Add PD support for hybrid model (Qwen3-Next, DeepSeek V3.2 Exp) (#10912)

Signed-off-by: Shangming Cai <csmthu@gmail.com> Co-authored-by: hzh0425 <hzh0425@apache.org> Co-authored-by: ZeldaHuang <hzm414167@alibaba-inc.com>
2025-10-16 09:59:14 +08:00
parent 97d857c096
commit 868403f642
13 changed files with 730 additions and 189 deletions
--- a/python/sglang/srt/managers/scheduler.py
+++ b/python/sglang/srt/managers/scheduler.py
@@ -807,9 +807,6 @@ class Scheduler(
                    self.tree_cache.cache_controller.layer_done_counter
                )
            elif self.is_hybrid:
-                assert (
-                    self.server_args.disaggregation_mode == "null"
-                ), "Hybrid mode does not support disaggregation yet"
                self.tree_cache = SWARadixCache(
                    req_to_token_pool=self.req_to_token_pool,
                    token_to_kv_pool_allocator=self.token_to_kv_pool_allocator,
@@ -819,9 +816,6 @@ class Scheduler(
                    is_eagle=self.spec_algorithm.is_eagle(),
                )
            elif self.is_hybrid_gdn:
-                assert (
-                    self.server_args.disaggregation_mode == "null"
-                ), "Hybrid GDN mode does not support disaggregation yet"
                self.tree_cache = MambaRadixCache(
                    req_to_token_pool=self.req_to_token_pool,
                    token_to_kv_pool_allocator=self.token_to_kv_pool_allocator,