Update bench speculative script (#4235)

2025-03-10 03:19:01 +08:00
parent df84ab2a5b
commit f1d09a6541
1 changed files with 17 additions and 0 deletions
--- a/scripts/playground/bench_speculative.py
+++ b/scripts/playground/bench_speculative.py
@@ -2,6 +2,9 @@
 Usage:
 # single GPU
 python3 bench_speculative.py --model-path meta-llama/Llama-2-7b-chat-hf --speculative-draft-model-path lmsys/sglang-EAGLE-llama2-chat-7B
+
+# multiple GPU
+python3 bench_speculative.py --model-path deepseek-ai/DeepSeek-V3 --speculative-draft-model-path lmsys/DeepSeek-V3-NextN --tp-size 8 --trust-remote-code --batch-size 1 4 8 16 32 --steps 0 1 2 --topk 0 1 2 4 --num_draft_tokens 0 2 4 8
 """

 import argparse
@@ -166,6 +169,20 @@ def main(args, server_args):
            ]
        )

+        if server_args.trust_remote_code:
+            other_args.extend(
+                [
+                    "--trust-remote-code",
+                ]
+            )
+
+        if server_args.enable_flashinfer_mla:
+            other_args.extend(
+                [
+                    "--enable-flashinfer-mla",
+                ]
+            )
+
        if server_args.quantization:
            other_args.extend(
                [