Docs fix about EAGLE and streaming output (#3166)
Co-authored-by: Chayenne <zhaochenyang@ucla.edu> Co-authored-by: Chayenne <zhaochen20@outlook.com> Co-authored-by: Jhin <jhinpan@umich.edu>
This commit is contained in:
@@ -37,7 +37,7 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# launch the offline engine\n",
|
||||
"\n",
|
||||
"from sglang.utils import stream_and_merge, async_stream_and_merge\n",
|
||||
"import sglang as sgl\n",
|
||||
"import asyncio\n",
|
||||
"\n",
|
||||
@@ -86,20 +86,22 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompts = [\n",
|
||||
" \"Hello, my name is\",\n",
|
||||
" \"The capital of France is\",\n",
|
||||
" \"The future of AI is\",\n",
|
||||
" \"Write a short, neutral self-introduction for a fictional character. Hello, my name is\",\n",
|
||||
" \"Provide a concise factual statement about France’s capital city. The capital of France is\",\n",
|
||||
" \"Explain possible future trends in artificial intelligence. The future of AI is\",\n",
|
||||
"]\n",
|
||||
"sampling_params = {\"temperature\": 0.8, \"top_p\": 0.95}\n",
|
||||
"\n",
|
||||
"print(\"\\n=== Testing synchronous streaming generation ===\")\n",
|
||||
"sampling_params = {\n",
|
||||
" \"temperature\": 0.2,\n",
|
||||
" \"top_p\": 0.9,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"print(\"\\n=== Testing synchronous streaming generation with overlap removal ===\\n\")\n",
|
||||
"\n",
|
||||
"for prompt in prompts:\n",
|
||||
" print(f\"\\nPrompt: {prompt}\")\n",
|
||||
" print(\"Generated text: \", end=\"\", flush=True)\n",
|
||||
"\n",
|
||||
" for chunk in llm.generate(prompt, sampling_params, stream=True):\n",
|
||||
" print(chunk[\"text\"], end=\"\", flush=True)\n",
|
||||
" print(f\"Prompt: {prompt}\")\n",
|
||||
" merged_output = stream_and_merge(llm, prompt, sampling_params)\n",
|
||||
" print(\"Generated text:\", merged_output)\n",
|
||||
" print()"
|
||||
]
|
||||
},
|
||||
@@ -117,9 +119,9 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompts = [\n",
|
||||
" \"Hello, my name is\",\n",
|
||||
" \"The capital of France is\",\n",
|
||||
" \"The future of AI is\",\n",
|
||||
" \"Write a short, neutral self-introduction for a fictional character. Hello, my name is\",\n",
|
||||
" \"Provide a concise factual statement about France’s capital city. The capital of France is\",\n",
|
||||
" \"Explain possible future trends in artificial intelligence. The future of AI is\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"sampling_params = {\"temperature\": 0.8, \"top_p\": 0.95}\n",
|
||||
@@ -152,13 +154,14 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"prompts = [\n",
|
||||
" \"Hello, my name is\",\n",
|
||||
" \"The capital of France is\",\n",
|
||||
" \"The future of AI is\",\n",
|
||||
" \"Write a short, neutral self-introduction for a fictional character. Hello, my name is\",\n",
|
||||
" \"Provide a concise factual statement about France’s capital city. The capital of France is\",\n",
|
||||
" \"Explain possible future trends in artificial intelligence. The future of AI is\",\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"sampling_params = {\"temperature\": 0.8, \"top_p\": 0.95}\n",
|
||||
"\n",
|
||||
"print(\"\\n=== Testing asynchronous streaming generation ===\")\n",
|
||||
"print(\"\\n=== Testing asynchronous streaming generation (no repeats) ===\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"async def main():\n",
|
||||
@@ -166,10 +169,11 @@
|
||||
" print(f\"\\nPrompt: {prompt}\")\n",
|
||||
" print(\"Generated text: \", end=\"\", flush=True)\n",
|
||||
"\n",
|
||||
" generator = await llm.async_generate(prompt, sampling_params, stream=True)\n",
|
||||
" async for chunk in generator:\n",
|
||||
" print(chunk[\"text\"], end=\"\", flush=True)\n",
|
||||
" print()\n",
|
||||
" # Replace direct calls to async_generate with our custom overlap-aware version\n",
|
||||
" async for cleaned_chunk in async_stream_and_merge(llm, prompt, sampling_params):\n",
|
||||
" print(cleaned_chunk, end=\"\", flush=True)\n",
|
||||
"\n",
|
||||
" print() # New line after each prompt\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"asyncio.run(main())"
|
||||
|
||||
Reference in New Issue
Block a user