Share target model embed and head weights for nextn (#4033)

2025-03-04 05:30:04 +08:00
parent 146ac8df07
commit 9fafa62db7
7 changed files with 47 additions and 45 deletions
--- a/python/sglang/srt/model_executor/forward_batch_info.py
+++ b/python/sglang/srt/model_executor/forward_batch_info.py
@@ -280,7 +280,8 @@ class ForwardBatch:
            ).to(device, non_blocking=True)
            if (
                model_runner.server_args.attention_backend != "torch_native"
-                and model_runner.server_args.speculative_algorithm != "NEXTN"
+                # TODO: Fix triton kernel illegal memory access for EAGLE
+                and model_runner.server_args.speculative_algorithm != "EAGLE"
            ):
                ret.extend_num_tokens = batch.extend_num_tokens
                positions, ret.extend_start_loc = compute_position_triton(