Share target model embed and head weights for nextn (#4033)

This commit is contained in:
Ke Bao
2025-03-04 05:30:04 +08:00
committed by GitHub
parent 146ac8df07
commit 9fafa62db7
7 changed files with 47 additions and 45 deletions

View File

@@ -270,10 +270,11 @@ class ServerArgs:
)
# Speculative Decoding
if (
self.speculative_algorithm == "EAGLE"
or self.speculative_algorithm == "NEXTN"
):
if self.speculative_algorithm == "NEXTN":
# NEXTN shares the same implementation of EAGLE
self.speculative_algorithm = "EAGLE"
if self.speculative_algorithm == "EAGLE":
self.disable_overlap_schedule = True
self.prefill_only_one_req = True
self.disable_cuda_graph_padding = True