Files
sglang/examples/frontend_language/quick_start/local_example_complete.py

71 lines
1.5 KiB
Python
Raw Normal View History

2024-01-30 04:29:32 -08:00
"""
Usage:
python3 local_example_complete.py
2024-01-30 04:29:32 -08:00
"""
2024-07-18 04:55:39 +10:00
2024-01-30 04:29:32 -08:00
import sglang as sgl
2024-01-30 04:29:32 -08:00
@sgl.function
def few_shot_qa(s, question):
2024-07-18 04:55:39 +10:00
s += """The following are questions with answers.
Q: What is the capital of France?
A: Paris
Q: What is the capital of Germany?
A: Berlin
Q: What is the capital of Italy?
A: Rome
2024-07-18 04:55:39 +10:00
"""
s += "Q: " + question + "\n"
2024-01-30 04:29:32 -08:00
s += "A:" + sgl.gen("answer", stop="\n", temperature=0)
2024-01-30 04:29:32 -08:00
def single():
state = few_shot_qa.run(question="What is the capital of the United States?")
answer = state["answer"].strip().lower()
2024-01-30 04:29:32 -08:00
assert "washington" in answer, f"answer: {state['answer']}"
2024-01-30 04:29:32 -08:00
print(state.text())
2024-01-30 04:29:32 -08:00
def stream():
state = few_shot_qa.run(
2024-07-18 04:55:39 +10:00
question="What is the capital of the United States?", stream=True
)
2024-01-30 04:29:32 -08:00
for out in state.text_iter("answer"):
print(out, end="", flush=True)
print()
def batch():
2024-07-18 04:55:39 +10:00
states = few_shot_qa.run_batch(
[
{"question": "What is the capital of the United States?"},
{"question": "What is the capital of China?"},
]
)
2024-01-30 04:29:32 -08:00
for s in states:
print(s["answer"])
if __name__ == "__main__":
runtime = sgl.Runtime(model_path="meta-llama/Llama-2-7b-chat-hf")
sgl.set_default_backend(runtime)
# Run a single request
print("\n========== single ==========\n")
single()
# Stream output
print("\n========== stream ==========\n")
stream()
# Run a batch of requests
print("\n========== batch ==========\n")
batch()
runtime.shutdown()