[Doc] Fix SGLang tool parser doc (#9886)
This commit is contained in:
@@ -70,7 +70,7 @@
|
||||
" \"\"\"\n",
|
||||
"python3 -m sglang.launch_server --model meta-llama/Llama-2-7b-chat-hf --speculative-algorithm EAGLE \\\n",
|
||||
" --speculative-draft-model-path lmsys/sglang-EAGLE-llama2-chat-7B --speculative-num-steps 3 \\\n",
|
||||
" --speculative-eagle-topk 4 --speculative-num-draft-tokens 16 --cuda-graph-max-bs 8\n",
|
||||
" --speculative-eagle-topk 4 --speculative-num-draft-tokens 16 --cuda-graph-max-bs 8 --log-level warning\n",
|
||||
"\"\"\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
@@ -126,7 +126,7 @@
|
||||
"python3 -m sglang.launch_server --model meta-llama/Llama-2-7b-chat-hf --speculative-algorithm EAGLE \\\n",
|
||||
" --speculative-draft-model-path lmsys/sglang-EAGLE-llama2-chat-7B --speculative-num-steps 5 \\\n",
|
||||
" --speculative-eagle-topk 8 --speculative-num-draft-tokens 64 --mem-fraction 0.6 \\\n",
|
||||
" --enable-torch-compile --torch-compile-max-bs 2\n",
|
||||
" --enable-torch-compile --torch-compile-max-bs 2 --log-level warning\n",
|
||||
"\"\"\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
@@ -186,7 +186,7 @@
|
||||
"python3 -m sglang.launch_server --model meta-llama/Meta-Llama-3-8B-Instruct --speculative-algorithm EAGLE \\\n",
|
||||
" --speculative-draft-model-path lmsys/sglang-EAGLE-LLaMA3-Instruct-8B --speculative-num-steps 5 \\\n",
|
||||
" --speculative-eagle-topk 8 --speculative-num-draft-tokens 64 --speculative-token-map thunlp/LLaMA3-Instruct-8B-FR-Spec/freq_32768.pt \\\n",
|
||||
" --mem-fraction 0.7 --cuda-graph-max-bs 2 --dtype float16 \n",
|
||||
" --mem-fraction 0.7 --cuda-graph-max-bs 2 --dtype float16 --log-level warning\n",
|
||||
"\"\"\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
@@ -242,7 +242,7 @@
|
||||
"python3 -m sglang.launch_server --model meta-llama/Llama-3.1-8B-Instruct --speculative-algorithm EAGLE3 \\\n",
|
||||
" --speculative-draft-model-path jamesliu1/sglang-EAGLE3-Llama-3.1-Instruct-8B --speculative-num-steps 5 \\\n",
|
||||
" --speculative-eagle-topk 8 --speculative-num-draft-tokens 32 --mem-fraction 0.6 \\\n",
|
||||
" --cuda-graph-max-bs 2 --dtype float16\n",
|
||||
" --cuda-graph-max-bs 2 --dtype float16 --log-level warning\n",
|
||||
"\"\"\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
@@ -297,7 +297,7 @@
|
||||
" \"\"\"\n",
|
||||
" python3 -m sglang.launch_server --model-path XiaomiMiMo/MiMo-7B-RL --host 0.0.0.0 --trust-remote-code \\\n",
|
||||
" --speculative-algorithm EAGLE --speculative-num-steps 1 --speculative-eagle-topk 1 --speculative-num-draft-tokens 2 \\\n",
|
||||
" --mem-fraction 0.5\n",
|
||||
" --mem-fraction 0.5 --log-level warning\n",
|
||||
"\"\"\"\n",
|
||||
")\n",
|
||||
"\n",
|
||||
|
||||
Reference in New Issue
Block a user