[Doc] Fix SGLang tool parser doc (#9886)

This commit is contained in:
Huapeng Zhou
2025-09-04 09:52:53 -04:00
committed by GitHub
parent ec15c8360e
commit 75ee00112d
14 changed files with 67 additions and 53 deletions

View File

@@ -70,7 +70,7 @@
" \"\"\"\n",
"python3 -m sglang.launch_server --model meta-llama/Llama-2-7b-chat-hf --speculative-algorithm EAGLE \\\n",
" --speculative-draft-model-path lmsys/sglang-EAGLE-llama2-chat-7B --speculative-num-steps 3 \\\n",
" --speculative-eagle-topk 4 --speculative-num-draft-tokens 16 --cuda-graph-max-bs 8\n",
" --speculative-eagle-topk 4 --speculative-num-draft-tokens 16 --cuda-graph-max-bs 8 --log-level warning\n",
"\"\"\"\n",
")\n",
"\n",
@@ -126,7 +126,7 @@
"python3 -m sglang.launch_server --model meta-llama/Llama-2-7b-chat-hf --speculative-algorithm EAGLE \\\n",
" --speculative-draft-model-path lmsys/sglang-EAGLE-llama2-chat-7B --speculative-num-steps 5 \\\n",
" --speculative-eagle-topk 8 --speculative-num-draft-tokens 64 --mem-fraction 0.6 \\\n",
" --enable-torch-compile --torch-compile-max-bs 2\n",
" --enable-torch-compile --torch-compile-max-bs 2 --log-level warning\n",
"\"\"\"\n",
")\n",
"\n",
@@ -186,7 +186,7 @@
"python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3-8B-Instruct --speculative-algorithm EAGLE \\\n",
" --speculative-draft-model-path lmsys/sglang-EAGLE-LLaMA3-Instruct-8B --speculative-num-steps 5 \\\n",
" --speculative-eagle-topk 8 --speculative-num-draft-tokens 64 --speculative-token-map thunlp/LLaMA3-Instruct-8B-FR-Spec/freq_32768.pt \\\n",
" --mem-fraction 0.7 --cuda-graph-max-bs 2 --dtype float16 \n",
" --mem-fraction 0.7 --cuda-graph-max-bs 2 --dtype float16 --log-level warning\n",
"\"\"\"\n",
")\n",
"\n",
@@ -242,7 +242,7 @@
"python3 -m sglang.launch_server --model meta-llama/Llama-3.1-8B-Instruct --speculative-algorithm EAGLE3 \\\n",
" --speculative-draft-model-path jamesliu1/sglang-EAGLE3-Llama-3.1-Instruct-8B --speculative-num-steps 5 \\\n",
" --speculative-eagle-topk 8 --speculative-num-draft-tokens 32 --mem-fraction 0.6 \\\n",
" --cuda-graph-max-bs 2 --dtype float16\n",
" --cuda-graph-max-bs 2 --dtype float16 --log-level warning\n",
"\"\"\"\n",
")\n",
"\n",
@@ -297,7 +297,7 @@
" \"\"\"\n",
" python3 -m sglang.launch_server --model-path XiaomiMiMo/MiMo-7B-RL --host 0.0.0.0 --trust-remote-code \\\n",
" --speculative-algorithm EAGLE --speculative-num-steps 1 --speculative-eagle-topk 1 --speculative-num-draft-tokens 2 \\\n",
" --mem-fraction 0.5\n",
" --mem-fraction 0.5 --log-level warning\n",
"\"\"\"\n",
")\n",
"\n",