smaller and non gated models for docs (#5378)

2025-04-21 02:38:25 +02:00
parent fac17acf08
commit 8de53da989
6 changed files with 51 additions and 44 deletions
--- a/docs/backend/openai_api_completions.ipynb
+++ b/docs/backend/openai_api_completions.ipynb
@@ -44,7 +44,7 @@
    "\n",
    "\n",
    "server_process, port = launch_server_cmd(\n",
-    "    \"python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3.1-8B-Instruct --host 0.0.0.0\"\n",
+    "    \"python3 -m sglang.launch_server --model-path qwen/qwen2.5-0.5b-instruct --host 0.0.0.0 --mem-fraction-static 0.8\"\n",
    ")\n",
    "\n",
    "wait_for_server(f\"http://localhost:{port}\")\n",
@@ -75,7 +75,7 @@
    "client = openai.Client(base_url=f\"http://127.0.0.1:{port}/v1\", api_key=\"None\")\n",
    "\n",
    "response = client.chat.completions.create(\n",
-    "    model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
+    "    model=\"qwen/qwen2.5-0.5b-instruct\",\n",
    "    messages=[\n",
    "        {\"role\": \"user\", \"content\": \"List 3 countries and their capitals.\"},\n",
    "    ],\n",
@@ -104,7 +104,7 @@
   "outputs": [],
   "source": [
    "response = client.chat.completions.create(\n",
-    "    model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
+    "    model=\"qwen/qwen2.5-0.5b-instruct\",\n",
    "    messages=[\n",
    "        {\n",
    "            \"role\": \"system\",\n",
@@ -143,7 +143,7 @@
   "outputs": [],
   "source": [
    "stream = client.chat.completions.create(\n",
-    "    model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
+    "    model=\"qwen/qwen2.5-0.5b-instruct\",\n",
    "    messages=[{\"role\": \"user\", \"content\": \"Say this is a test\"}],\n",
    "    stream=True,\n",
    ")\n",
@@ -169,7 +169,7 @@
   "outputs": [],
   "source": [
    "response = client.completions.create(\n",
-    "    model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
+    "    model=\"qwen/qwen2.5-0.5b-instruct\",\n",
    "    prompt=\"List 3 countries and their capitals.\",\n",
    "    temperature=0,\n",
    "    max_tokens=64,\n",
@@ -198,7 +198,7 @@
   "outputs": [],
   "source": [
    "response = client.completions.create(\n",
-    "    model=\"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
+    "    model=\"qwen/qwen2.5-0.5b-instruct\",\n",
    "    prompt=\"Write a short story about a space explorer.\",\n",
    "    temperature=0.7,  # Moderate temperature for creative writing\n",
    "    max_tokens=150,  # Longer response for a story\n",
@@ -257,7 +257,7 @@
    "        \"method\": \"POST\",\n",
    "        \"url\": \"/chat/completions\",\n",
    "        \"body\": {\n",
-    "            \"model\": \"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
+    "            \"model\": \"qwen/qwen2.5-0.5b-instruct\",\n",
    "            \"messages\": [\n",
    "                {\"role\": \"user\", \"content\": \"Tell me a joke about programming\"}\n",
    "            ],\n",
@@ -269,7 +269,7 @@
    "        \"method\": \"POST\",\n",
    "        \"url\": \"/chat/completions\",\n",
    "        \"body\": {\n",
-    "            \"model\": \"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
+    "            \"model\": \"qwen/qwen2.5-0.5b-instruct\",\n",
    "            \"messages\": [{\"role\": \"user\", \"content\": \"What is Python?\"}],\n",
    "            \"max_tokens\": 50,\n",
    "        },\n",
@@ -362,7 +362,7 @@
    "            \"method\": \"POST\",\n",
    "            \"url\": \"/chat/completions\",\n",
    "            \"body\": {\n",
-    "                \"model\": \"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
+    "                \"model\": \"qwen/qwen2.5-0.5b-instruct\",\n",
    "                \"messages\": [\n",
    "                    {\n",
    "                        \"role\": \"system\",\n",
@@ -439,7 +439,7 @@
    "            \"method\": \"POST\",\n",
    "            \"url\": \"/chat/completions\",\n",
    "            \"body\": {\n",
-    "                \"model\": \"meta-llama/Meta-Llama-3.1-8B-Instruct\",\n",
+    "                \"model\": \"qwen/qwen2.5-0.5b-instruct\",\n",
    "                \"messages\": [\n",
    "                    {\n",
    "                        \"role\": \"system\",\n",