diff --git a/examples/runtime/engine/embedding.py b/examples/runtime/engine/embedding.py new file mode 100644 index 000000000..b927a188b --- /dev/null +++ b/examples/runtime/engine/embedding.py @@ -0,0 +1,27 @@ +import sglang as sgl + + +def main(): + # Sample prompts. + prompts = [ + "Hello, my name is", + "The president of the United States is", + "The capital of France is", + "The future of AI is", + ] + # Create an LLM. + llm = sgl.Engine( + model_path="Alibaba-NLP/gte-Qwen2-1.5B-instruct", is_embedding=True + ) + + outputs = llm.encode(prompts) + # Print the outputs (embedding vectors) + for prompt, output in zip(prompts, outputs): + print("===============================") + print(f"Prompt: {prompt}\nEmbedding vector: {output['embedding']}") + + +# The __main__ condition is necessary here because we use "spawn" to create subprocesses +# Spawn starts a fresh program every time, if there is no __main__, it will run into infinite loop to keep spawning processes from sgl.Engine +if __name__ == "__main__": + main() diff --git a/examples/runtime/engine/readme.md b/examples/runtime/engine/readme.md index 1d70e99d0..986e0b12e 100644 --- a/examples/runtime/engine/readme.md +++ b/examples/runtime/engine/readme.md @@ -4,7 +4,8 @@ SGLang provides a direct inference engine without the need for an HTTP server. There are generally two use cases: 1. **Offline Batch Inference** -2. **Custom Server on Top of the Engine** +2. **Embedding Generation** +3. **Custom Server on Top of the Engine** ## Examples @@ -12,7 +13,11 @@ SGLang provides a direct inference engine without the need for an HTTP server. T In this example, we launch an SGLang engine and feed a batch of inputs for inference. If you provide a very large batch, the engine will intelligently schedule the requests to process efficiently and prevent OOM (Out of Memory) errors. -### 2. [Custom Server](./custom_server.py) +### 2. [Embedding Generation](./embedding.py) + +In this example, we launch an SGLang engine and feed a batch of inputs for embedding generation. + +### 3. [Custom Server](./custom_server.py) This example demonstrates how to create a custom server on top of the SGLang Engine. We use [Sanic](https://sanic.dev/en/) as an example. The server supports both non-streaming and streaming endpoints.