Support token ids in engine.generate (#1820)

2024-10-27 14:02:34 -07:00
parent c77762d57f
commit 6fcd6d7d6d
3 changed files with 72 additions and 4 deletions
--- a/examples/runtime/engine/input_ids.py
+++ b/examples/runtime/engine/input_ids.py
@@ -0,0 +1,39 @@
+"""
+This example demonstrates how to provide tokenized ids as input instead of text prompt
+"""
+
+import sglang as sgl
+from sglang.srt.hf_transformers_utils import get_tokenizer
+
+MODEL_PATH = "meta-llama/Llama-3.1-8B-Instruct"
+
+def main():
+    # Sample prompts.
+    prompts = [
+        "Hello, my name is",
+        "The president of the United States is",
+        "The capital of France is",
+        "The future of AI is",
+    ]
+    # Create a sampling params object.
+    sampling_params = {"temperature": 0.8, "top_p": 0.95}
+
+    # Tokenize inputs
+    tokenizer = get_tokenizer(MODEL_PATH)
+    token_ids_list = [tokenizer.encode(prompt) for prompt in prompts]
+    
+    # Create an LLM.
+    # You can also specify `skip_tokenizer_init=True`, but it requires explicit detokenization at the end
+    llm = sgl.Engine(model_path=MODEL_PATH)
+
+    outputs = llm.generate(input_ids=token_ids_list, sampling_params=sampling_params)
+    # Print the outputs.
+    for prompt, output in zip(prompts, outputs):
+        print("===============================")
+        print(f"Prompt: {prompt}\nGenerated Text: {output['text']}")
+
+
+# The __main__ condition is necessary here because we use "spawn" to create subprocesses
+# Spawn starts a fresh program every time, if there is no __main__, it will run into infinite loop to keep spawning processes from sgl.Engine
+if __name__ == "__main__":
+    main()