From f1d09a654158d7a041ec328d907861ad816383e8 Mon Sep 17 00:00:00 2001 From: Ke Bao Date: Mon, 10 Mar 2025 03:19:01 +0800 Subject: [PATCH] Update bench speculative script (#4235) --- scripts/playground/bench_speculative.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/scripts/playground/bench_speculative.py b/scripts/playground/bench_speculative.py index 812ad1a82..74370b78d 100644 --- a/scripts/playground/bench_speculative.py +++ b/scripts/playground/bench_speculative.py @@ -2,6 +2,9 @@ Usage: # single GPU python3 bench_speculative.py --model-path meta-llama/Llama-2-7b-chat-hf --speculative-draft-model-path lmsys/sglang-EAGLE-llama2-chat-7B + +# multiple GPU +python3 bench_speculative.py --model-path deepseek-ai/DeepSeek-V3 --speculative-draft-model-path lmsys/DeepSeek-V3-NextN --tp-size 8 --trust-remote-code --batch-size 1 4 8 16 32 --steps 0 1 2 --topk 0 1 2 4 --num_draft_tokens 0 2 4 8 """ import argparse @@ -166,6 +169,20 @@ def main(args, server_args): ] ) + if server_args.trust_remote_code: + other_args.extend( + [ + "--trust-remote-code", + ] + ) + + if server_args.enable_flashinfer_mla: + other_args.extend( + [ + "--enable-flashinfer-mla", + ] + ) + if server_args.quantization: other_args.extend( [