[Fix] Add speculative_draft_model_revision to server_args (#5255)
Signed-off-by: Devashish Lal <devashish@rivosinc.com>
This commit is contained in:
@@ -132,8 +132,8 @@ python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algori
|
||||
# On Blackwell:
|
||||
# - Chain decoding (topk = 1) is supported on TRTLLM-MHA backend. Tree decoding (topk > 1) is in progress, stay tuned!
|
||||
# - Both tree decoding (topk > 1) and chain decoding (topk = 1) are supported on the Triton backend.
|
||||
python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algo EAGLE3 --speculative-draft lmsys/EAGLE3-gpt-oss-120b-bf16 --speculative-num-steps 3 --speculative-eagle-topk 1 --speculative-num-draft-tokens 4 --tp 4
|
||||
python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algo EAGLE3 --speculative-draft lmsys/EAGLE3-gpt-oss-120b-bf16 --speculative-num-steps 5 --speculative-eagle-topk 4 --speculative-num-draft-tokens 8 --attention-backend triton --tp 4
|
||||
python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algo EAGLE3 --speculative-draft-model-path lmsys/EAGLE3-gpt-oss-120b-bf16 --speculative-num-steps 3 --speculative-eagle-topk 1 --speculative-num-draft-tokens 4 --tp 4
|
||||
python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algo EAGLE3 --speculative-draft-model-path lmsys/EAGLE3-gpt-oss-120b-bf16 --speculative-num-steps 5 --speculative-eagle-topk 4 --speculative-num-draft-tokens 8 --attention-backend triton --tp 4
|
||||
```
|
||||
|
||||
Benchmark Command
|
||||
|
||||
@@ -18,7 +18,7 @@ python3 bench_sglang.py --num-questions 80
|
||||
### Benchmark sglang EAGLE
|
||||
```
|
||||
python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3-8B-Instruct --speculative-algo EAGLE \
|
||||
--speculative-draft lmsys/sglang-EAGLE-LLaMA3-Instruct-8B --speculative-num-steps 5 \
|
||||
--speculative-draft-model-path lmsys/sglang-EAGLE-LLaMA3-Instruct-8B --speculative-num-steps 5 \
|
||||
--speculative-eagle-topk 8 --speculative-num-draft-tokens 64 --dtype float16 --port 30000
|
||||
```
|
||||
|
||||
|
||||
Reference in New Issue
Block a user