smaller and non gated models for docs (#5378)

This commit is contained in:
simveit
2025-04-21 02:38:25 +02:00
committed by GitHub
parent fac17acf08
commit 8de53da989
6 changed files with 51 additions and 44 deletions

View File

@@ -29,7 +29,7 @@
"\n",
"Launch the server in your terminal and wait for it to initialize.\n",
"\n",
"**Remember to add** `--chat-template llama_3_vision` **to specify the [vision chat template](https://docs.sglang.ai/backend/openai_api_vision.html#Chat-Template), otherwise, the server will only support text (images wont be passed in), which can lead to degraded performance.**\n",
"**Remember to add** `--chat-template` **for example** `--chat-template=qwen2-vl` **to specify the [vision chat template](https://docs.sglang.ai/backend/openai_api_vision.html#Chat-Template), otherwise, the server will only support text (images wont be passed in), which can lead to degraded performance.**\n",
"\n",
"We need to specify `--chat-template` for vision language models because the chat template provided in Hugging Face tokenizer only supports text."
]
@@ -51,8 +51,8 @@
"\n",
"vision_process, port = launch_server_cmd(\n",
" \"\"\"\n",
"python3 -m sglang.launch_server --model-path meta-llama/Llama-3.2-11B-Vision-Instruct \\\n",
" --chat-template=llama_3_vision\n",
"python3 -m sglang.launch_server --model-path Qwen/Qwen2.5-VL-7B-Instruct \\\n",
" --chat-template=qwen2-vl\n",
"\"\"\"\n",
")\n",
"\n",
@@ -79,7 +79,7 @@
"curl_command = f\"\"\"\n",
"curl -s http://localhost:{port}/v1/chat/completions \\\\\n",
" -d '{{\n",
" \"model\": \"meta-llama/Llama-3.2-11B-Vision-Instruct\",\n",
" \"model\": \"Qwen/Qwen2.5-VL-7B-Instruct\",\n",
" \"messages\": [\n",
" {{\n",
" \"role\": \"user\",\n",
@@ -127,7 +127,7 @@
"url = f\"http://localhost:{port}/v1/chat/completions\"\n",
"\n",
"data = {\n",
" \"model\": \"meta-llama/Llama-3.2-11B-Vision-Instruct\",\n",
" \"model\": \"Qwen/Qwen2.5-VL-7B-Instruct\",\n",
" \"messages\": [\n",
" {\n",
" \"role\": \"user\",\n",
@@ -167,7 +167,7 @@
"client = OpenAI(base_url=f\"http://localhost:{port}/v1\", api_key=\"None\")\n",
"\n",
"response = client.chat.completions.create(\n",
" model=\"meta-llama/Llama-3.2-11B-Vision-Instruct\",\n",
" model=\"Qwen/Qwen2.5-VL-7B-Instruct\",\n",
" messages=[\n",
" {\n",
" \"role\": \"user\",\n",
@@ -211,7 +211,7 @@
"client = OpenAI(base_url=f\"http://localhost:{port}/v1\", api_key=\"None\")\n",
"\n",
"response = client.chat.completions.create(\n",
" model=\"meta-llama/Llama-3.2-11B-Vision-Instruct\",\n",
" model=\"Qwen/Qwen2.5-VL-7B-Instruct\",\n",
" messages=[\n",
" {\n",
" \"role\": \"user\",\n",