Fix docs (#2164)
This commit is contained in:
20
README.md
20
README.md
@@ -41,15 +41,10 @@ The core features include:
|
|||||||
- **Active Community**: SGLang is open-source and backed by an active community with industry adoption.
|
- **Active Community**: SGLang is open-source and backed by an active community with industry adoption.
|
||||||
|
|
||||||
## Getting Started
|
## Getting Started
|
||||||
Install SGLang: See [https://sgl-project.github.io/start/install.html](https://sgl-project.github.io/start/install.html)
|
- [Install SGLang](https://sgl-project.github.io/start/install.html)
|
||||||
|
- [Send requests](https://sgl-project.github.io/start/send_request.html)
|
||||||
Send requests: See [https://sgl-project.github.io/start/send_request.html](https://sgl-project.github.io/start/send_request.html)
|
- [Backend: SGLang Runtime (SRT)](https://sgl-project.github.io/backend/backend.html)
|
||||||
|
- [Frontend: Structured Generation Language (SGLang)](https://sgl-project.github.io/frontend/frontend.html)
|
||||||
## Backend: SGLang Runtime (SRT)
|
|
||||||
See [https://sgl-project.github.io/backend/backend.html](https://sgl-project.github.io/backend/backend.html)
|
|
||||||
|
|
||||||
## Frontend: Structured Generation Language (SGLang)
|
|
||||||
See [https://sgl-project.github.io/frontend/frontend.html](https://sgl-project.github.io/frontend/frontend.html)
|
|
||||||
|
|
||||||
## Benchmark And Performance
|
## Benchmark And Performance
|
||||||
Learn more in our release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/), [v0.3 blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/)
|
Learn more in our release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-sglang-llama3/), [v0.3 blog](https://lmsys.org/blog/2024-09-04-sglang-v0-3/)
|
||||||
@@ -57,6 +52,9 @@ Learn more in our release blogs: [v0.2 blog](https://lmsys.org/blog/2024-07-25-s
|
|||||||
## Roadmap
|
## Roadmap
|
||||||
[Development Roadmap (2024 Q4)](https://github.com/sgl-project/sglang/issues/1487)
|
[Development Roadmap (2024 Q4)](https://github.com/sgl-project/sglang/issues/1487)
|
||||||
|
|
||||||
## Citation And Acknowledgment
|
## Adoption and Sponsorship
|
||||||
|
The project is supported by (alphabetically): AMD, Baseten, Etched, Hyperbolic, Jam & Tea Studios, LinkedIn, NVIDIA, RunPod, Stanford, UC Berkeley, and xAI.
|
||||||
|
|
||||||
|
## Acknowledgment and Citation
|
||||||
|
We learned from the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql).
|
||||||
Please cite our paper, [SGLang: Efficient Execution of Structured Language Model Programs](https://arxiv.org/abs/2312.07104), if you find the project useful.
|
Please cite our paper, [SGLang: Efficient Execution of Structured Language Model Programs](https://arxiv.org/abs/2312.07104), if you find the project useful.
|
||||||
We also learned from the design and reused code from the following projects: [Guidance](https://github.com/guidance-ai/guidance), [vLLM](https://github.com/vllm-project/vllm), [LightLLM](https://github.com/ModelTC/lightllm), [FlashInfer](https://github.com/flashinfer-ai/flashinfer), [Outlines](https://github.com/outlines-dev/outlines), and [LMQL](https://github.com/eth-sri/lmql).
|
|
||||||
|
|||||||
@@ -39,7 +39,6 @@
|
|||||||
"# launch the offline engine\n",
|
"# launch the offline engine\n",
|
||||||
"\n",
|
"\n",
|
||||||
"import sglang as sgl\n",
|
"import sglang as sgl\n",
|
||||||
"from sglang.utils import print_highlight\n",
|
|
||||||
"import asyncio\n",
|
"import asyncio\n",
|
||||||
"\n",
|
"\n",
|
||||||
"llm = sgl.Engine(model_path=\"meta-llama/Meta-Llama-3.1-8B-Instruct\")"
|
"llm = sgl.Engine(model_path=\"meta-llama/Meta-Llama-3.1-8B-Instruct\")"
|
||||||
@@ -69,8 +68,8 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"outputs = llm.generate(prompts, sampling_params)\n",
|
"outputs = llm.generate(prompts, sampling_params)\n",
|
||||||
"for prompt, output in zip(prompts, outputs):\n",
|
"for prompt, output in zip(prompts, outputs):\n",
|
||||||
" print_highlight(\"===============================\")\n",
|
" print(\"===============================\")\n",
|
||||||
" print_highlight(f\"Prompt: {prompt}\\nGenerated text: {output['text']}\")"
|
" print(f\"Prompt: {prompt}\\nGenerated text: {output['text']}\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@@ -93,10 +92,10 @@
|
|||||||
"]\n",
|
"]\n",
|
||||||
"sampling_params = {\"temperature\": 0.8, \"top_p\": 0.95}\n",
|
"sampling_params = {\"temperature\": 0.8, \"top_p\": 0.95}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print_highlight(\"\\n=== Testing synchronous streaming generation ===\")\n",
|
"print(\"\\n=== Testing synchronous streaming generation ===\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"for prompt in prompts:\n",
|
"for prompt in prompts:\n",
|
||||||
" print_highlight(f\"\\nPrompt: {prompt}\")\n",
|
" print(f\"\\nPrompt: {prompt}\")\n",
|
||||||
" print(\"Generated text: \", end=\"\", flush=True)\n",
|
" print(\"Generated text: \", end=\"\", flush=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" for chunk in llm.generate(prompt, sampling_params, stream=True):\n",
|
" for chunk in llm.generate(prompt, sampling_params, stream=True):\n",
|
||||||
@@ -125,15 +124,15 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"sampling_params = {\"temperature\": 0.8, \"top_p\": 0.95}\n",
|
"sampling_params = {\"temperature\": 0.8, \"top_p\": 0.95}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print_highlight(\"\\n=== Testing asynchronous batch generation ===\")\n",
|
"print(\"\\n=== Testing asynchronous batch generation ===\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"async def main():\n",
|
"async def main():\n",
|
||||||
" outputs = await llm.async_generate(prompts, sampling_params)\n",
|
" outputs = await llm.async_generate(prompts, sampling_params)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" for prompt, output in zip(prompts, outputs):\n",
|
" for prompt, output in zip(prompts, outputs):\n",
|
||||||
" print_highlight(f\"\\nPrompt: {prompt}\")\n",
|
" print(f\"\\nPrompt: {prompt}\")\n",
|
||||||
" print_highlight(f\"Generated text: {output['text']}\")\n",
|
" print(f\"Generated text: {output['text']}\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"asyncio.run(main())"
|
"asyncio.run(main())"
|
||||||
@@ -159,12 +158,12 @@
|
|||||||
"]\n",
|
"]\n",
|
||||||
"sampling_params = {\"temperature\": 0.8, \"top_p\": 0.95}\n",
|
"sampling_params = {\"temperature\": 0.8, \"top_p\": 0.95}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"print_highlight(\"\\n=== Testing asynchronous streaming generation ===\")\n",
|
"print(\"\\n=== Testing asynchronous streaming generation ===\")\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"async def main():\n",
|
"async def main():\n",
|
||||||
" for prompt in prompts:\n",
|
" for prompt in prompts:\n",
|
||||||
" print_highlight(f\"\\nPrompt: {prompt}\")\n",
|
" print(f\"\\nPrompt: {prompt}\")\n",
|
||||||
" print(\"Generated text: \", end=\"\", flush=True)\n",
|
" print(\"Generated text: \", end=\"\", flush=True)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" generator = await llm.async_generate(prompt, sampling_params, stream=True)\n",
|
" generator = await llm.async_generate(prompt, sampling_params, stream=True)\n",
|
||||||
|
|||||||
Reference in New Issue
Block a user