From cd0be7489f3ad0f12ca21db23962c0dd52788262 Mon Sep 17 00:00:00 2001 From: Byron Hsu Date: Mon, 14 Oct 2024 19:56:21 -0700 Subject: [PATCH] [doc] improve engine doc and add to readme (#1670) --- README.md | 34 ++++++++++++++++++++++++++++++++++ docs/en/backend.md | 31 ++++++++++++++++++++++++++++--- 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a15825b68..707a754be 100644 --- a/README.md +++ b/README.md @@ -241,6 +241,40 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --tp 4 --nccl-init sgl-dev-0:50000 --nnodes 2 --node-rank 1 ``` +### Engine Without HTTP Server + +We also provide an inference engine **without a HTTP server**. For example, + +```python +import sglang as sgl + + +def main(): + prompts = [ + "Hello, my name is", + "The president of the United States is", + "The capital of France is", + "The future of AI is", + ] + sampling_params = {"temperature": 0.8, "top_p": 0.95} + llm = sgl.Engine(model_path="meta-llama/Meta-Llama-3.1-8B-Instruct") + + outputs = llm.generate(prompts, sampling_params) + for prompt, output in zip(prompts, outputs): + print("===============================") + print(f"Prompt: {prompt}\nGenerated text: {output['text']}") + +if __name__ == "__main__": + main() +``` + +This can be used for: + +1. **Offline Batch Inference** +2. **Building Custom Servers** + +You can view the full example [here](https://github.com/sgl-project/sglang/tree/main/examples/runtime/engine) + ### Supported Models **Generative Models** diff --git a/docs/en/backend.md b/docs/en/backend.md index d19eb062a..516ab2af0 100644 --- a/docs/en/backend.md +++ b/docs/en/backend.md @@ -93,14 +93,39 @@ python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct python -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --tp 4 --nccl-init sgl-dev-0:50000 --nnodes 2 --node-rank 1 ``` -### SRT Engine: Direct Inference Without HTTP +### Engine Without HTTP Server -SGLang provides a direct inference engine **without an HTTP server**. This can be used for: +We also provide an inference engine **without a HTTP server**. For example, + +```python +import sglang as sgl + + +def main(): + prompts = [ + "Hello, my name is", + "The president of the United States is", + "The capital of France is", + "The future of AI is", + ] + sampling_params = {"temperature": 0.8, "top_p": 0.95} + llm = sgl.Engine(model_path="meta-llama/Meta-Llama-3.1-8B-Instruct") + + outputs = llm.generate(prompts, sampling_params) + for prompt, output in zip(prompts, outputs): + print("===============================") + print(f"Prompt: {prompt}\nGenerated text: {output['text']}") + +if __name__ == "__main__": + main() +``` + +This can be used for: 1. **Offline Batch Inference** 2. **Building Custom Servers** -We provide usage examples [here](https://github.com/sgl-project/sglang/tree/main/examples/runtime/engine) +You can view the full example [here](https://github.com/sgl-project/sglang/tree/main/examples/runtime/engine) ### Supported Models