Docs: Implemented frontend docs (#3791)

Co-authored-by: Chayenne <zhaochen20@outlook.com>
2025-02-27 00:30:05 +01:00
parent 7c1692aa90
commit acd1a15921
8 changed files with 599 additions and 328 deletions
--- a/docs/backend/offline_engine_api.ipynb
+++ b/docs/backend/offline_engine_api.ipynb
@@ -23,6 +23,17 @@
    "Additionally, you can easily build a custom server on top of the SGLang offline engine. A detailed example working in a python script can be found in [custom_server](https://github.com/sgl-project/sglang/blob/main/examples/runtime/engine/custom_server.py)."
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Advanced Usage\n",
+    "\n",
+    "The engine supports [vlm inference](https://github.com/sgl-project/sglang/blob/main/examples/runtime/engine/offline_batch_inference_vlm.py) as well as [extracting hidden states](https://github.com/sgl-project/sglang/blob/main/examples/runtime/engine/hidden_states.py). \n",
+    "\n",
+    "Please see [the examples](https://github.com/sgl-project/sglang/tree/main/examples/runtime/engine) for further use cases."
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {},
@@ -39,14 +50,22 @@
   "outputs": [],
   "source": [
    "# launch the offline engine\n",
-    "from sglang.utils import stream_and_merge, async_stream_and_merge\n",
-    "import sglang as sgl\n",
    "import asyncio\n",
+    "import io\n",
+    "import os\n",
+    "\n",
+    "from PIL import Image\n",
+    "import requests\n",
+    "import sglang as sgl\n",
+    "\n",
+    "from sglang.srt.conversation import chat_templates\n",
    "from sglang.test.test_utils import is_in_ci\n",
+    "from sglang.utils import async_stream_and_merge, stream_and_merge\n",
    "\n",
    "if is_in_ci():\n",
    "    import patch\n",
    "\n",
+    "\n",
    "llm = sgl.Engine(model_path=\"meta-llama/Meta-Llama-3.1-8B-Instruct\")"
   ]
  },
@@ -185,57 +204,6 @@
    "asyncio.run(main())"
   ]
  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "llm.shutdown()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Return Hidden States"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "llm = sgl.Engine(\n",
-    "    model_path=\"meta-llama/Meta-Llama-3.1-8B-Instruct\", return_hidden_states=True\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "prompts = [\n",
-    "    \"Hello, my name is\",\n",
-    "    \"The president of the United States is\",\n",
-    "    \"The capital of France is\",\n",
-    "    \"The future of AI is\",\n",
-    "]\n",
-    "\n",
-    "sampling_params = {\"temperature\": 0.8, \"top_p\": 0.95, \"max_new_tokens\": 10}\n",
-    "\n",
-    "outputs = llm.generate(prompts, sampling_params=sampling_params)\n",
-    "for prompt, output in zip(prompts, outputs):\n",
-    "    print(\"===============================\")\n",
-    "    print(\n",
-    "        f\"Prompt: {prompt}\\nGenerated text: {output['text']}\\nPrompt_Tokens: {output['meta_info']['prompt_tokens']}\\tCompletion_tokens: {output['meta_info']['completion_tokens']}\\nHidden states: {[i.shape for i in output['meta_info']['hidden_states']]}\"\n",
-    "    )\n",
-    "    print()"
-   ]
-  },
  {
   "cell_type": "code",
   "execution_count": null,