Add accuracy and latency tests of eagle into CI (#3027)

This commit is contained in:
Lianmin Zheng
2025-01-21 02:55:14 -08:00
committed by GitHub
parent ec1c21cdc4
commit a4331cd260
7 changed files with 186 additions and 123 deletions

View File

@@ -42,6 +42,9 @@ DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 = "neuralmagic/Meta-Llama-3.1-70B-In
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1 = "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4,hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN = "Qwen/Qwen2.5-1.5B-Instruct"
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST = "meta-llama/Llama-2-7b-chat-hf"
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST = "lmzheng/sglang-EAGLE-llama2-chat-7B"
def is_in_ci():
"""Return whether it is in CI runner."""
@@ -538,6 +541,7 @@ def run_bench_serving(
random_input_len=4096,
random_output_len=2048,
disable_stream=False,
disable_ignore_eos=False,
need_warmup=False,
):
# Launch the server
@@ -572,7 +576,7 @@ def run_bench_serving(
disable_stream=disable_stream,
return_logprob=False,
seed=0,
disable_ignore_eos=False,
disable_ignore_eos=disable_ignore_eos,
extra_request_body=None,
apply_chat_template=False,
profile=None,