Return logprob for choices (#87)

2024-01-23 05:07:30 -08:00
parent 9e037c822c
commit 9a16fea012
15 changed files with 161 additions and 112 deletions
--- a/examples/usage/async_io.py
+++ b/examples/usage/async_io.py
@@ -0,0 +1,36 @@
+import asyncio
+from sglang import Runtime
+
+
+async def generate(
+    engine,
+    prompt,
+    sampling_params,
+):
+    tokenizer = engine.get_tokenizer()
+
+    messages = [
+        {"role": "system", "content": "You will be given question answer tasks.",},
+        {"role": "user", "content": prompt},
+    ]
+
+    prompt = tokenizer.apply_chat_template(
+        messages, tokenize=False, add_generation_prompt=True
+    )
+
+    stream = engine.add_request(prompt, sampling_params)
+
+    async for output in stream:
+        print(output, end="", flush=True)
+    print()
+
+
+if __name__ == "__main__":
+    runtime = Runtime(model_path="meta-llama/Llama-2-7b-chat-hf")
+    print("runtime ready")
+    
+    prompt = "Who is Alan Turing?"
+    sampling_params = {"max_new_tokens": 128}
+    asyncio.run(generate(runtime, prompt, sampling_params))
+    
+    runtime.shutdown()