Docs fix about EAGLE and streaming output (#3166)

Co-authored-by: Chayenne <zhaochenyang@ucla.edu>
Co-authored-by: Chayenne <zhaochen20@outlook.com>
Co-authored-by: Jhin <jhinpan@umich.edu>
This commit is contained in:
Jhin
2025-01-27 20:10:45 -06:00
committed by GitHub
parent 08104b56de
commit 7b9b4f4426
6 changed files with 91 additions and 29 deletions

View File

@@ -37,7 +37,7 @@
"outputs": [],
"source": [
"# launch the offline engine\n",
"\n",
"from sglang.utils import stream_and_merge, async_stream_and_merge\n",
"import sglang as sgl\n",
"import asyncio\n",
"\n",
@@ -86,20 +86,22 @@
"outputs": [],
"source": [
"prompts = [\n",
" \"Hello, my name is\",\n",
" \"The capital of France is\",\n",
" \"The future of AI is\",\n",
" \"Write a short, neutral self-introduction for a fictional character. Hello, my name is\",\n",
" \"Provide a concise factual statement about Frances capital city. The capital of France is\",\n",
" \"Explain possible future trends in artificial intelligence. The future of AI is\",\n",
"]\n",
"sampling_params = {\"temperature\": 0.8, \"top_p\": 0.95}\n",
"\n",
"print(\"\\n=== Testing synchronous streaming generation ===\")\n",
"sampling_params = {\n",
" \"temperature\": 0.2,\n",
" \"top_p\": 0.9,\n",
"}\n",
"\n",
"print(\"\\n=== Testing synchronous streaming generation with overlap removal ===\\n\")\n",
"\n",
"for prompt in prompts:\n",
" print(f\"\\nPrompt: {prompt}\")\n",
" print(\"Generated text: \", end=\"\", flush=True)\n",
"\n",
" for chunk in llm.generate(prompt, sampling_params, stream=True):\n",
" print(chunk[\"text\"], end=\"\", flush=True)\n",
" print(f\"Prompt: {prompt}\")\n",
" merged_output = stream_and_merge(llm, prompt, sampling_params)\n",
" print(\"Generated text:\", merged_output)\n",
" print()"
]
},
@@ -117,9 +119,9 @@
"outputs": [],
"source": [
"prompts = [\n",
" \"Hello, my name is\",\n",
" \"The capital of France is\",\n",
" \"The future of AI is\",\n",
" \"Write a short, neutral self-introduction for a fictional character. Hello, my name is\",\n",
" \"Provide a concise factual statement about Frances capital city. The capital of France is\",\n",
" \"Explain possible future trends in artificial intelligence. The future of AI is\",\n",
"]\n",
"\n",
"sampling_params = {\"temperature\": 0.8, \"top_p\": 0.95}\n",
@@ -152,13 +154,14 @@
"outputs": [],
"source": [
"prompts = [\n",
" \"Hello, my name is\",\n",
" \"The capital of France is\",\n",
" \"The future of AI is\",\n",
" \"Write a short, neutral self-introduction for a fictional character. Hello, my name is\",\n",
" \"Provide a concise factual statement about Frances capital city. The capital of France is\",\n",
" \"Explain possible future trends in artificial intelligence. The future of AI is\",\n",
"]\n",
"\n",
"sampling_params = {\"temperature\": 0.8, \"top_p\": 0.95}\n",
"\n",
"print(\"\\n=== Testing asynchronous streaming generation ===\")\n",
"print(\"\\n=== Testing asynchronous streaming generation (no repeats) ===\")\n",
"\n",
"\n",
"async def main():\n",
@@ -166,10 +169,11 @@
" print(f\"\\nPrompt: {prompt}\")\n",
" print(\"Generated text: \", end=\"\", flush=True)\n",
"\n",
" generator = await llm.async_generate(prompt, sampling_params, stream=True)\n",
" async for chunk in generator:\n",
" print(chunk[\"text\"], end=\"\", flush=True)\n",
" print()\n",
" # Replace direct calls to async_generate with our custom overlap-aware version\n",
" async for cleaned_chunk in async_stream_and_merge(llm, prompt, sampling_params):\n",
" print(cleaned_chunk, end=\"\", flush=True)\n",
"\n",
" print() # New line after each prompt\n",
"\n",
"\n",
"asyncio.run(main())"