Share target model embed and head weights for nextn (#4033)

2025-03-04 05:30:04 +08:00
parent 146ac8df07
commit 9fafa62db7
7 changed files with 47 additions and 45 deletions
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -270,10 +270,11 @@ class ServerArgs:
            )

        # Speculative Decoding
-        if (
-            self.speculative_algorithm == "EAGLE"
-            or self.speculative_algorithm == "NEXTN"
-        ):
+        if self.speculative_algorithm == "NEXTN":
+            # NEXTN shares the same implementation of EAGLE
+            self.speculative_algorithm = "EAGLE"
+
+        if self.speculative_algorithm == "EAGLE":
            self.disable_overlap_schedule = True
            self.prefill_only_one_req = True
            self.disable_cuda_graph_padding = True