Add accuracy and latency tests of eagle into CI (#3027)

2025-01-21 02:55:14 -08:00
parent ec1c21cdc4
commit a4331cd260
7 changed files with 186 additions and 123 deletions
--- a/python/sglang/test/test_utils.py
+++ b/python/sglang/test/test_utils.py
@@ -42,6 +42,9 @@ DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 = "neuralmagic/Meta-Llama-3.1-70B-In
 DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1 = "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4,hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
 DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN = "Qwen/Qwen2.5-1.5B-Instruct"

+DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST = "meta-llama/Llama-2-7b-chat-hf"
+DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST = "lmzheng/sglang-EAGLE-llama2-chat-7B"
+

 def is_in_ci():
    """Return whether it is in CI runner."""
@@ -538,6 +541,7 @@ def run_bench_serving(
    random_input_len=4096,
    random_output_len=2048,
    disable_stream=False,
+    disable_ignore_eos=False,
    need_warmup=False,
 ):
    # Launch the server
@@ -572,7 +576,7 @@ def run_bench_serving(
        disable_stream=disable_stream,
        return_logprob=False,
        seed=0,
-        disable_ignore_eos=False,
+        disable_ignore_eos=disable_ignore_eos,
        extra_request_body=None,
        apply_chat_template=False,
        profile=None,