[speculative decoding] rename lookahead to ngram (#11010)

Co-authored-by: a4zhangfei <a4zhangfei@qq.com>
2025-09-29 12:06:59 +08:00
parent e05555fad8
commit 24f7cb1ece
22 changed files with 154 additions and 181 deletions
--- a/test/srt/run_suite.py
+++ b/test/srt/run_suite.py
@@ -79,7 +79,7 @@ suites = {
        TestFile("test_hidden_states.py", 55),
        TestFile("test_hybrid_attn_backend.py", 100),
        TestFile("test_standalone_speculative_decoding.py", 250),
-        TestFile("test_lookahead_speculative_decoding.py", 250),
+        TestFile("test_ngram_speculative_decoding.py", 250),
        TestFile("test_input_embeddings.py", 38),
        TestFile("test_io_struct.py", 8),
        TestFile("test_jinja_template_utils.py", 1),
--- a/test/srt/test_lookahead_speculative_decoding.py
+++ b/test/srt/test_lookahead_speculative_decoding.py
@@ -7,7 +7,7 @@ import requests
 from sglang.srt.utils import kill_process_tree
 from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
 from sglang.test.test_utils import (
-    DEFAULT_LOOKAHEAD_SPECULATIVE_TARGET_MODEL_FOR_TEST,
+    DEFAULT_NGRAM_SPECULATIVE_TARGET_MODEL_FOR_TEST,
    DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
    DEFAULT_URL_FOR_TEST,
    CustomTestCase,
@@ -23,7 +23,7 @@ DEFAULT_SERVER_ARGS = [
    "--cuda-graph-max-bs",
    "8",
    "--speculative-algorithm",
-    "LOOKAHEAD",
+    "NGRAM",
    "--speculative-num-draft-tokens",
    "16",
    "--mem-fraction-static",
@@ -33,7 +33,7 @@ DEFAULT_SERVER_ARGS = [

 class TestStandaloneSpeculativeDecodingBase(CustomTestCase):

-    model = DEFAULT_LOOKAHEAD_SPECULATIVE_TARGET_MODEL_FOR_TEST
+    model = DEFAULT_NGRAM_SPECULATIVE_TARGET_MODEL_FOR_TEST
    base_url = DEFAULT_URL_FOR_TEST
    accuracy_threshold = 0.79  # derived tests need to override this
    spec_decode_threshold = 1.8  # derived spec decoding tests need to override this