Add accuracy and latency tests of eagle into CI (#3027)
This commit is contained in:
@@ -42,6 +42,9 @@ DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_FP8_TP2 = "neuralmagic/Meta-Llama-3.1-70B-In
|
||||
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1 = "hugging-quants/Meta-Llama-3.1-8B-Instruct-AWQ-INT4,hugging-quants/Meta-Llama-3.1-8B-Instruct-GPTQ-INT4"
|
||||
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN = "Qwen/Qwen2.5-1.5B-Instruct"
|
||||
|
||||
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST = "meta-llama/Llama-2-7b-chat-hf"
|
||||
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST = "lmzheng/sglang-EAGLE-llama2-chat-7B"
|
||||
|
||||
|
||||
def is_in_ci():
|
||||
"""Return whether it is in CI runner."""
|
||||
@@ -538,6 +541,7 @@ def run_bench_serving(
|
||||
random_input_len=4096,
|
||||
random_output_len=2048,
|
||||
disable_stream=False,
|
||||
disable_ignore_eos=False,
|
||||
need_warmup=False,
|
||||
):
|
||||
# Launch the server
|
||||
@@ -572,7 +576,7 @@ def run_bench_serving(
|
||||
disable_stream=disable_stream,
|
||||
return_logprob=False,
|
||||
seed=0,
|
||||
disable_ignore_eos=False,
|
||||
disable_ignore_eos=disable_ignore_eos,
|
||||
extra_request_body=None,
|
||||
apply_chat_template=False,
|
||||
profile=None,
|
||||
|
||||
Reference in New Issue
Block a user