[Fix] Add speculative_draft_model_revision to server_args (#5255)
Signed-off-by: Devashish Lal <devashish@rivosinc.com>
This commit is contained in:
@@ -132,8 +132,8 @@ python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algori
|
||||
# On Blackwell:
|
||||
# - Chain decoding (topk = 1) is supported on TRTLLM-MHA backend. Tree decoding (topk > 1) is in progress, stay tuned!
|
||||
# - Both tree decoding (topk > 1) and chain decoding (topk = 1) are supported on the Triton backend.
|
||||
python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algo EAGLE3 --speculative-draft lmsys/EAGLE3-gpt-oss-120b-bf16 --speculative-num-steps 3 --speculative-eagle-topk 1 --speculative-num-draft-tokens 4 --tp 4
|
||||
python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algo EAGLE3 --speculative-draft lmsys/EAGLE3-gpt-oss-120b-bf16 --speculative-num-steps 5 --speculative-eagle-topk 4 --speculative-num-draft-tokens 8 --attention-backend triton --tp 4
|
||||
python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algo EAGLE3 --speculative-draft-model-path lmsys/EAGLE3-gpt-oss-120b-bf16 --speculative-num-steps 3 --speculative-eagle-topk 1 --speculative-num-draft-tokens 4 --tp 4
|
||||
python3 -m sglang.launch_server --model openai/gpt-oss-120b --speculative-algo EAGLE3 --speculative-draft-model-path lmsys/EAGLE3-gpt-oss-120b-bf16 --speculative-num-steps 5 --speculative-eagle-topk 4 --speculative-num-draft-tokens 8 --attention-backend triton --tp 4
|
||||
```
|
||||
|
||||
Benchmark Command
|
||||
|
||||
Reference in New Issue
Block a user