29 lines
698 B
Python
29 lines
698 B
Python
|
|
from sglang import function, gen, set_default_backend, Runtime
|
||
|
|
|
||
|
|
|
||
|
|
@function
|
||
|
|
def few_shot_qa(s, question):
|
||
|
|
s += (
|
||
|
|
"""The following are questions with answers.
|
||
|
|
Q: What is the capital of France?
|
||
|
|
A: Paris
|
||
|
|
Q: What is the capital of Germany?
|
||
|
|
A: Berlin
|
||
|
|
Q: What is the capital of Italy?
|
||
|
|
A: Rome
|
||
|
|
""")
|
||
|
|
s += "Q: " + question + "\n"
|
||
|
|
s += "A:" + gen("answer", stop="\n", temperature=0)
|
||
|
|
|
||
|
|
|
||
|
|
runtime = Runtime(model_path="meta-llama/Llama-2-7b-chat-hf")
|
||
|
|
set_default_backend(runtime)
|
||
|
|
|
||
|
|
state = few_shot_qa.run(question="What is the capital of the United States?")
|
||
|
|
|
||
|
|
answer = state["answer"].strip().lower()
|
||
|
|
assert "washington" in answer, f"answer: {state['answer']}"
|
||
|
|
print(state.text())
|
||
|
|
|
||
|
|
runtime.shutdown()
|