[speculative decoding] rename lookahead to ngram (#11010)
Co-authored-by: a4zhangfei <a4zhangfei@qq.com>
This commit is contained in:
@@ -79,7 +79,7 @@ suites = {
|
||||
TestFile("test_hidden_states.py", 55),
|
||||
TestFile("test_hybrid_attn_backend.py", 100),
|
||||
TestFile("test_standalone_speculative_decoding.py", 250),
|
||||
TestFile("test_lookahead_speculative_decoding.py", 250),
|
||||
TestFile("test_ngram_speculative_decoding.py", 250),
|
||||
TestFile("test_input_embeddings.py", 38),
|
||||
TestFile("test_io_struct.py", 8),
|
||||
TestFile("test_jinja_template_utils.py", 1),
|
||||
|
||||
@@ -7,7 +7,7 @@ import requests
|
||||
from sglang.srt.utils import kill_process_tree
|
||||
from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
|
||||
from sglang.test.test_utils import (
|
||||
DEFAULT_LOOKAHEAD_SPECULATIVE_TARGET_MODEL_FOR_TEST,
|
||||
DEFAULT_NGRAM_SPECULATIVE_TARGET_MODEL_FOR_TEST,
|
||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
DEFAULT_URL_FOR_TEST,
|
||||
CustomTestCase,
|
||||
@@ -23,7 +23,7 @@ DEFAULT_SERVER_ARGS = [
|
||||
"--cuda-graph-max-bs",
|
||||
"8",
|
||||
"--speculative-algorithm",
|
||||
"LOOKAHEAD",
|
||||
"NGRAM",
|
||||
"--speculative-num-draft-tokens",
|
||||
"16",
|
||||
"--mem-fraction-static",
|
||||
@@ -33,7 +33,7 @@ DEFAULT_SERVER_ARGS = [
|
||||
|
||||
class TestStandaloneSpeculativeDecodingBase(CustomTestCase):
|
||||
|
||||
model = DEFAULT_LOOKAHEAD_SPECULATIVE_TARGET_MODEL_FOR_TEST
|
||||
model = DEFAULT_NGRAM_SPECULATIVE_TARGET_MODEL_FOR_TEST
|
||||
base_url = DEFAULT_URL_FOR_TEST
|
||||
accuracy_threshold = 0.79 # derived tests need to override this
|
||||
spec_decode_threshold = 1.8 # derived spec decoding tests need to override this
|
||||
Reference in New Issue
Block a user