Update quick start examples (#120)

2024-01-30 04:29:32 -08:00
parent 4ea92f8307
commit 0617528632
20 changed files with 567 additions and 237 deletions
--- a/examples/quick_start/anthropic_example_chat.py
+++ b/examples/quick_start/anthropic_example_chat.py
@@ -1,19 +1,67 @@
-from sglang import function, system, user, assistant, gen, set_default_backend, Anthropic
+"""
+Usage:
+export ANTHROPIC_API_KEY=sk-******
+python3 anthropic_example_chat.py
+"""
+import sglang as sgl


-@function
+@sgl.function
 def multi_turn_question(s, question_1, question_2):
-    s += user(question_1)
-    s += assistant(gen("answer_1", max_tokens=256))
-    s += user(question_2)
-    s += assistant(gen("answer_2", max_tokens=256))
+    s += sgl.user(question_1)
+    s += sgl.assistant(sgl.gen("answer_1", max_tokens=256))
+    s += sgl.user(question_2)
+    s += sgl.assistant(sgl.gen("answer_2", max_tokens=256))

-set_default_backend(Anthropic("claude-2"))

-state = multi_turn_question.run(
-    question_1="What is the capital of the United States?",
-    question_2="List two local attractions.",
-)
+def single():
+    state = multi_turn_question.run(
+        question_1="What is the capital of the United States?",
+        question_2="List two local attractions.",
+    )

-for m in state.messages():
-    print(m["role"], ":", m["content"])
+    for m in state.messages():
+        print(m["role"], ":", m["content"])
+
+    print("answer_1", state["answer_1"])
+
+
+def stream():
+    state = multi_turn_question.run(
+        question_1="What is the capital of the United States?",
+        question_2="List two local attractions.",
+        stream=True
+    )
+
+    for out in state.text_iter():
+        print(out, end="", flush=True)
+    print()
+
+
+def batch():
+    states = multi_turn_question.run_batch([
+        {"question_1": "What is the capital of the United States?",
+         "question_2": "List two local attractions."},
+
+        {"question_1": "What is the capital of France?",
+         "question_2": "What is the population of this city?"},
+    ])
+
+    for s in states:
+        print(s.messages())
+
+
+if __name__ == "__main__":
+    sgl.set_default_backend(sgl.Anthropic("claude-2"))
+
+    # Run a single request
+    print("\n========== single ==========\n")
+    single()
+
+    # Stream output
+    print("\n========== stream ==========\n")
+    stream()
+
+    # Run a batch of requests
+    print("\n========== batch ==========\n")
+    batch()
--- a/examples/quick_start/anthropic_example_complete.py
+++ b/examples/quick_start/anthropic_example_complete.py
@@ -1,7 +1,13 @@
-from sglang import function, gen, set_default_backend, Anthropic
+"""
+Usage:
+export ANTHROPIC_API_KEY=sk-******
+python3 anthropic_example_complete.py
+"""
+
+import sglang as sgl


-@function
+@sgl.function
 def few_shot_qa(s, question):
    s += (
 """
@@ -13,14 +19,49 @@ def few_shot_qa(s, question):
 \n\nAssistant: Rome
 """)
    s += "\n\nHuman: " + question + "\n"
-    s += "\n\nAssistant:" + gen("answer", stop="\n", temperature=0)
+    s += "\n\nAssistant:" + sgl.gen("answer", stop="\n", temperature=0)


-set_default_backend(Anthropic("claude-2"))
+def single():
+    state = few_shot_qa.run(question="What is the capital of the United States?")
+    answer = state["answer"].strip().lower()

-state = few_shot_qa.run(question="What is the capital of the United States?")
-answer = state["answer"].strip().lower()
+    assert "washington" in answer, f"answer: {state['answer']}"

-assert "washington" in answer, f"answer: {state['answer']}"
+    print(state.text())

-print(state.text())
+
+def stream():
+    state = few_shot_qa.run(
+        question="What is the capital of the United States?",
+        stream=True)
+
+    for out in state.text_iter("answer"):
+        print(out, end="", flush=True)
+    print()
+
+
+def batch():
+    states = few_shot_qa.run_batch([
+        {"question": "What is the capital of the United States?"},
+        {"question": "What is the capital of China?"},
+    ])
+
+    for s in states:
+        print(s["answer"])
+
+
+if __name__ == "__main__":
+    sgl.set_default_backend(sgl.Anthropic("claude-2"))
+
+    # Run a single request
+    print("\n========== single ==========\n")
+    single()
+
+    # Stream output
+    print("\n========== stream ==========\n")
+    stream()
+
+    # Run a batch of requests
+    print("\n========== batch ==========\n")
+    batch()
--- a/examples/quick_start/anthropic_example_stream.py
+++ b/examples/quick_start/anthropic_example_stream.py
@@ -1,20 +0,0 @@
-from sglang import function, system, user, assistant, gen, set_default_backend, Anthropic
-
-
-@function
-def multi_turn_question(s, question_1, question_2):
-    s += user(question_1)
-    s += assistant(gen("answer_1", max_tokens=256))
-    s += user(question_2)
-    s += assistant(gen("answer_2", max_tokens=256))
-
-set_default_backend(Anthropic("claude-2"))
-
-state = multi_turn_question.run(
-    question_1="What is the capital of the United States?",
-    question_2="List two local attractions.",
-    stream=True
-)
-
-for out in state.text_iter():
-    print(out, end="", flush=True)
--- a/examples/quick_start/gemini_example_chat.py
+++ b/examples/quick_start/gemini_example_chat.py
@@ -0,0 +1,67 @@
+"""
+Usage:
+export GCP_PROJECT_ID=******
+python3 gemini_example_chat.py
+"""
+import sglang as sgl
+
+
+@sgl.function
+def multi_turn_question(s, question_1, question_2):
+    s += sgl.user(question_1)
+    s += sgl.assistant(sgl.gen("answer_1", max_tokens=256))
+    s += sgl.user(question_2)
+    s += sgl.assistant(sgl.gen("answer_2", max_tokens=256))
+
+
+def single():
+    state = multi_turn_question.run(
+        question_1="What is the capital of the United States?",
+        question_2="List two local attractions.",
+    )
+
+    for m in state.messages():
+        print(m["role"], ":", m["content"])
+
+    print("answer_1", state["answer_1"])
+
+
+def stream():
+    state = multi_turn_question.run(
+        question_1="What is the capital of the United States?",
+        question_2="List two local attractions.",
+        stream=True
+    )
+
+    for out in state.text_iter():
+        print(out, end="", flush=True)
+    print()
+
+
+def batch():
+    states = multi_turn_question.run_batch([
+        {"question_1": "What is the capital of the United States?",
+         "question_2": "List two local attractions."},
+
+        {"question_1": "What is the capital of France?",
+         "question_2": "What is the population of this city?"},
+    ])
+
+    for s in states:
+        print(s.messages())
+
+
+if __name__ == "__main__":
+    sgl.set_default_backend(sgl.VertexAI("gemini-pro"))
+
+    # Run a single request
+    print("\n========== single ==========\n")
+    single()
+
+    # Stream output
+    print("\n========== stream ==========\n")
+    stream()
+
+    # Run a batch of requests
+    print("\n========== batch ==========\n")
+    batch()
--- a/examples/quick_start/gemini_example_complete.py
+++ b/examples/quick_start/gemini_example_complete.py
@@ -1,7 +1,13 @@
-from sglang import function, gen, set_default_backend, VertexAI
+"""
+Usage:
+export GCP_PROJECT_ID=******
+python3 gemini_example_complete.py
+"""
+
+import sglang as sgl


-@function
+@sgl.function
 def few_shot_qa(s, question):
    s += (
 """The following are questions with answers.
@@ -13,14 +19,49 @@ Q: What is the capital of Italy?
 A: Rome
 """)
    s += "Q: " + question + "\n"
-    s += "A:" + gen("answer", stop="\n", temperature=0)
+    s += "A:" + sgl.gen("answer", stop="\n", temperature=0)


-set_default_backend(VertexAI("gemini-pro"))
+def single():
+    state = few_shot_qa.run(question="What is the capital of the United States?")
+    answer = state["answer"].strip().lower()

-state = few_shot_qa.run(question="What is the capital of the United States?")
-answer = state["answer"].strip().lower()
+    assert "washington" in answer, f"answer: {state['answer']}"

-assert "washington" in answer, f"answer: {state['answer']}"
+    print(state.text())

-print(state.text())
+
+def stream():
+    state = few_shot_qa.run(
+        question="What is the capital of the United States?",
+        stream=True)
+
+    for out in state.text_iter("answer"):
+        print(out, end="", flush=True)
+    print()
+
+
+def batch():
+    states = few_shot_qa.run_batch([
+        {"question": "What is the capital of the United States?"},
+        {"question": "What is the capital of China?"},
+    ])
+
+    for s in states:
+        print(s["answer"])
+
+
+if __name__ == "__main__":
+    sgl.set_default_backend(sgl.VertexAI("gemini-pro"))
+
+    # Run a single request
+    print("\n========== single ==========\n")
+    single()
+
+    # Stream output
+    print("\n========== stream ==========\n")
+    stream()
+
+    # Run a batch of requests
+    print("\n========== batch ==========\n")
+    batch()
--- a/examples/quick_start/gemini_example_multimodal_chat.py
+++ b/examples/quick_start/gemini_example_multimodal_chat.py
@@ -1,19 +1,29 @@
-from sglang import function, user, assistant, gen, image, set_default_backend, VertexAI
+"""
+Usage:
+export GCP_PROJECT_ID=******
+python3 gemini_example_multimodal_chat.py
+"""
+import sglang as sgl


-@function
+@sgl.function
 def image_qa(s, image_file1, image_file2, question):
-    s += user(image(image_file1) + image(image_file2) + question)
-    s += assistant(gen("answer_1", max_tokens=256))
+    s += sgl.user(sgl.image(image_file1) + sgl.image(image_file2) + question)
+    s += sgl.assistant(sgl.gen("answer", max_tokens=256))

-set_default_backend(VertexAI("gemini-pro-vision"))

-state = image_qa.run(
-    image_file1="./images/cat.jpeg",
-    image_file2="./images/dog.jpeg",
-    question="Describe difference of the 2 images in one sentence.",
-    stream=True
-)
+if __name__ == "__main__":
+    sgl.set_default_backend(sgl.VertexAI("gemini-pro-vision"))

-for out in state.text_iter():
-    print(out, end="", flush=True)
+    state = image_qa.run(
+        image_file1="./images/cat.jpeg",
+        image_file2="./images/dog.jpeg",
+        question="Describe difference of the two images in one sentence.",
+        stream=True
+    )
+
+    for out in state.text_iter("answer"):
+        print(out, end="", flush=True)
+    print()
+
+    print(state["answer"])
--- a/examples/quick_start/gemini_example_stream.py
+++ b/examples/quick_start/gemini_example_stream.py
@@ -1,20 +0,0 @@
-from sglang import function, user, assistant, gen, set_default_backend, VertexAI
-
-
-@function
-def multi_turn_question(s, question_1, question_2):
-    s += user(question_1)
-    s += assistant(gen("answer_1", max_tokens=256))
-    s += user(question_2)
-    s += assistant(gen("answer_2", max_tokens=256))
-
-set_default_backend(VertexAI("gemini-pro"))
-
-state = multi_turn_question.run(
-    question_1="What is the capital of the United States?",
-    question_2="List two local attractions.",
-    stream=True
-)
-
-for out in state.text_iter():
-    print(out, end="", flush=True)
--- a/examples/quick_start/openai_example_chat.py
+++ b/examples/quick_start/openai_example_chat.py
@@ -1,20 +1,68 @@
-from sglang import function, system, user, assistant, gen, set_default_backend, OpenAI
+"""
+Usage:
+export OPENAI_API_KEY=sk-******
+python3 openai_example_chat.py
+"""
+import sglang as sgl


-@function
+@sgl.function
 def multi_turn_question(s, question_1, question_2):
-    s += system("You are a helpful assistant.")
-    s += user(question_1)
-    s += assistant(gen("answer_1", max_tokens=256))
-    s += user(question_2)
-    s += assistant(gen("answer_2", max_tokens=256))
+    s += sgl.system("You are a helpful assistant.")
+    s += sgl.user(question_1)
+    s += sgl.assistant(sgl.gen("answer_1", max_tokens=256))
+    s += sgl.user(question_2)
+    s += sgl.assistant(sgl.gen("answer_2", max_tokens=256))

-set_default_backend(OpenAI("gpt-3.5-turbo"))

-state = multi_turn_question.run(
-    question_1="What is the capital of the United States?",
-    question_2="List two local attractions.",
-)
+def single():
+    state = multi_turn_question.run(
+        question_1="What is the capital of the United States?",
+        question_2="List two local attractions.",
+    )

-for m in state.messages():
-    print(m["role"], ":", m["content"])
+    for m in state.messages():
+        print(m["role"], ":", m["content"])
+
+    print("answer_1", state["answer_1"])
+
+
+def stream():
+    state = multi_turn_question.run(
+        question_1="What is the capital of the United States?",
+        question_2="List two local attractions.",
+        stream=True
+    )
+
+    for out in state.text_iter():
+        print(out, end="", flush=True)
+    print()
+
+
+def batch():
+    states = multi_turn_question.run_batch([
+        {"question_1": "What is the capital of the United States?",
+         "question_2": "List two local attractions."},
+
+        {"question_1": "What is the capital of France?",
+         "question_2": "What is the population of this city?"},
+    ])
+
+    for s in states:
+        print(s.messages())
+
+
+if __name__ == "__main__":
+    sgl.set_default_backend(sgl.OpenAI("gpt-3.5-turbo"))
+
+    # Run a single request
+    print("\n========== single ==========\n")
+    single()
+
+    # Stream output
+    print("\n========== stream ==========\n")
+    stream()
+
+    # Run a batch of requests
+    print("\n========== batch ==========\n")
+    batch()
--- a/examples/quick_start/openai_example_complete.py
+++ b/examples/quick_start/openai_example_complete.py
@@ -1,7 +1,13 @@
-from sglang import function, gen, set_default_backend, OpenAI
+"""
+Usage:
+export OPENAI_API_KEY=sk-******
+python3 openai_example_complete.py
+"""
+
+import sglang as sgl


-@function
+@sgl.function
 def few_shot_qa(s, question):
    s += (
 """The following are questions with answers.
@@ -13,14 +19,49 @@ Q: What is the capital of Italy?
 A: Rome
 """)
    s += "Q: " + question + "\n"
-    s += "A:" + gen("answer", stop="\n", temperature=0)
+    s += "A:" + sgl.gen("answer", stop="\n", temperature=0)


-set_default_backend(OpenAI("gpt-3.5-turbo-instruct"))
+def single():
+    state = few_shot_qa.run(question="What is the capital of the United States?")
+    answer = state["answer"].strip().lower()

-state = few_shot_qa.run(question="What is the capital of the United States?")
-answer = state["answer"].strip().lower()
+    assert "washington" in answer, f"answer: {state['answer']}"

-assert "washington" in answer, f"answer: {state['answer']}"
+    print(state.text())

-print(state.text())
+
+def stream():
+    state = few_shot_qa.run(
+        question="What is the capital of the United States?",
+        stream=True)
+
+    for out in state.text_iter("answer"):
+        print(out, end="", flush=True)
+    print()
+
+
+def batch():
+    states = few_shot_qa.run_batch([
+        {"question": "What is the capital of the United States?"},
+        {"question": "What is the capital of China?"},
+    ])
+
+    for s in states:
+        print(s["answer"])
+
+
+if __name__ == "__main__":
+    sgl.set_default_backend(sgl.OpenAI("gpt-3.5-turbo-instruct"))
+
+    # Run a single request
+    print("\n========== single ==========\n")
+    single()
+
+    # Stream output
+    print("\n========== stream ==========\n")
+    stream()
+
+    # Run a batch of requests
+    print("\n========== batch ==========\n")
+    batch()
--- a/examples/quick_start/openai_example_stream.py
+++ b/examples/quick_start/openai_example_stream.py
@@ -1,21 +0,0 @@
-from sglang import function, system, user, assistant, gen, set_default_backend, OpenAI
-
-
-@function
-def multi_turn_question(s, question_1, question_2):
-    s += system("You are a helpful assistant.")
-    s += user(question_1)
-    s += assistant(gen("answer_1", max_tokens=256))
-    s += user(question_2)
-    s += assistant(gen("answer_2", max_tokens=256))
-
-set_default_backend(OpenAI("gpt-3.5-turbo"))
-
-state = multi_turn_question.run(
-    question_1="What is the capital of the United States?",
-    question_2="List two local attractions.",
-    stream=True
-)
-
-for out in state.text_iter():
-    print(out, end="", flush=True)
--- a/examples/quick_start/srt_example_chat.py
+++ b/examples/quick_start/srt_example_chat.py
@@ -1,26 +1,69 @@
-from sglang import function, system, user, assistant, gen, set_default_backend, Runtime
+"""
+Usage:
+python3 srt_example_chat.py
+"""
+import sglang as sgl


-@function
+@sgl.function
 def multi_turn_question(s, question_1, question_2):
-    s += system("You are a helpful assistant.")
-    s += user(question_1)
-    s += assistant(gen("answer_1", max_tokens=256))
-    s += user(question_2)
-    s += assistant(gen("answer_2", max_tokens=256))
+    s += sgl.user(question_1)
+    s += sgl.assistant(sgl.gen("answer_1", max_tokens=256))
+    s += sgl.user(question_2)
+    s += sgl.assistant(sgl.gen("answer_2", max_tokens=256))


-runtime = Runtime(model_path="meta-llama/Llama-2-7b-chat-hf")
-#runtime = Runtime(model_path="mistralai/Mixtral-8x7B-Instruct-v0.1")
-set_default_backend(runtime)
+def single():
+    state = multi_turn_question.run(
+        question_1="What is the capital of the United States?",
+        question_2="List two local attractions.",
+    )

-state = multi_turn_question.run(
-    question_1="What is the capital of the United States?",
-    question_2="List two local attractions.",
-)
+    for m in state.messages():
+        print(m["role"], ":", m["content"])

-for m in state.messages():
-    print(m["role"], ":", m["content"])
+    print("answer_1", state["answer_1"])


-runtime.shutdown()
+def stream():
+    state = multi_turn_question.run(
+        question_1="What is the capital of the United States?",
+        question_2="List two local attractions.",
+        stream=True
+    )
+
+    for out in state.text_iter():
+        print(out, end="", flush=True)
+    print()
+
+
+def batch():
+    states = multi_turn_question.run_batch([
+        {"question_1": "What is the capital of the United States?",
+         "question_2": "List two local attractions."},
+
+        {"question_1": "What is the capital of France?",
+         "question_2": "What is the population of this city?"},
+    ])
+
+    for s in states:
+        print(s.messages())
+
+
+if __name__ == "__main__":
+    runtime = sgl.Runtime(model_path="meta-llama/Llama-2-7b-chat-hf")
+    sgl.set_default_backend(runtime)
+
+    # Run a single request
+    print("\n========== single ==========\n")
+    single()
+
+    # Stream output
+    print("\n========== stream ==========\n")
+    stream()
+
+    # Run a batch of requests
+    print("\n========== batch ==========\n")
+    batch()
+
+    runtime.shutdown()
--- a/examples/quick_start/srt_example_complete.py
+++ b/examples/quick_start/srt_example_complete.py
@@ -1,7 +1,11 @@
-from sglang import function, gen, set_default_backend, Runtime
+"""
+Usage:
+python3 srt_example_complete.py
+"""
+import sglang as sgl


-@function
+@sgl.function
 def few_shot_qa(s, question):
    s += (
 """The following are questions with answers.
@@ -13,16 +17,52 @@ Q: What is the capital of Italy?
 A: Rome
 """)
    s += "Q: " + question + "\n"
-    s += "A:" + gen("answer", stop="\n", temperature=0)
+    s += "A:" + sgl.gen("answer", stop="\n", temperature=0)


-runtime = Runtime(model_path="meta-llama/Llama-2-7b-chat-hf")
-set_default_backend(runtime)
+def single():
+    state = few_shot_qa.run(question="What is the capital of the United States?")
+    answer = state["answer"].strip().lower()

-state = few_shot_qa.run(question="What is the capital of the United States?")
+    assert "washington" in answer, f"answer: {state['answer']}"

-answer = state["answer"].strip().lower()
-assert "washington" in answer, f"answer: {state['answer']}"
-print(state.text())
+    print(state.text())

-runtime.shutdown()
+
+def stream():
+    state = few_shot_qa.run(
+        question="What is the capital of the United States?",
+        stream=True)
+
+    for out in state.text_iter("answer"):
+        print(out, end="", flush=True)
+    print()
+
+
+def batch():
+    states = few_shot_qa.run_batch([
+        {"question": "What is the capital of the United States?"},
+        {"question": "What is the capital of China?"},
+    ])
+
+    for s in states:
+        print(s["answer"])
+
+
+if __name__ == "__main__":
+    runtime = sgl.Runtime(model_path="meta-llama/Llama-2-7b-chat-hf")
+    sgl.set_default_backend(runtime)
+
+    # Run a single request
+    print("\n========== single ==========\n")
+    single()
+
+    # Stream output
+    print("\n========== stream ==========\n")
+    stream()
+
+    # Run a batch of requests
+    print("\n========== batch ==========\n")
+    batch()
+
+    runtime.shutdown()
--- a/examples/quick_start/srt_example_llava.py
+++ b/examples/quick_start/srt_example_llava.py
@@ -10,29 +10,53 @@ def image_qa(s, image_path, question):
    s += sgl.assistant(sgl.gen("answer"))


-runtime = sgl.Runtime(model_path="liuhaotian/llava-v1.5-7b",
-                      tokenizer_path="llava-hf/llava-1.5-7b-hf")
-sgl.set_default_backend(runtime)
+def single():
+    state = image_qa.run(
+        image_path="images/cat.jpeg",
+        question="What is this?",
+        max_new_tokens=64)
+    print(state["answer"], "\n")


-# Single
-state = image_qa.run(
-    image_path="images/cat.jpeg",
-    question="What is this?",
-    max_new_tokens=64)
-print(state["answer"], "\n")
+def stream():
+    state = image_qa.run(
+        image_path="images/cat.jpeg",
+        question="What is this?",
+        max_new_tokens=64,
+        stream=True)
+
+    for out in state.text_iter("answer"):
+        print(out, end="", flush=True)
+    print()


-# Batch
-states = image_qa.run_batch(
-    [
-        {"image_path": "images/cat.jpeg", "question":"What is this?"},
-        {"image_path": "images/dog.jpeg", "question":"What is this?"},
-    ],
-    max_new_tokens=64,
-)
-for s in states:
-    print(s["answer"], "\n")
+def batch():
+    states = image_qa.run_batch(
+        [
+            {"image_path": "images/cat.jpeg", "question":"What is this?"},
+            {"image_path": "images/dog.jpeg", "question":"What is this?"},
+        ],
+        max_new_tokens=64,
+    )
+    for s in states:
+        print(s["answer"], "\n")


-runtime.shutdown()
+if __name__ == "__main__":
+    runtime = sgl.Runtime(model_path="liuhaotian/llava-v1.5-7b",
+                          tokenizer_path="llava-hf/llava-1.5-7b-hf")
+    sgl.set_default_backend(runtime)
+
+    # Run a single request
+    print("\n========== single ==========\n")
+    single()
+
+    # Stream output
+    print("\n========== stream ==========\n")
+    stream()
+
+    # Run a batch of requests
+    print("\n========== batch ==========\n")
+    batch()
+
+    runtime.shutdown()
--- a/examples/quick_start/srt_example_regex.py
+++ b/examples/quick_start/srt_example_regex.py
@@ -1,24 +0,0 @@
-from sglang import function, gen, set_default_backend, Runtime
-
-
-IP_ADDR_REGEX = r"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)"
-
-
-@function
-def regex_gen(s):
-    s += "Q: What is the IP address of the Google DNS servers?\n"
-    s += "A: " + gen(
-        "answer",
-        temperature=0,
-        regex=IP_ADDR_REGEX,
-    )
-
-
-runtime = Runtime(model_path="meta-llama/Llama-2-7b-chat-hf")
-set_default_backend(runtime)
-
-state = regex_gen.run()
-
-print(state.text())
-
-runtime.shutdown()
--- a/examples/quick_start/srt_example_stream.py
+++ b/examples/quick_start/srt_example_stream.py
@@ -1,26 +0,0 @@
-from sglang import function, system, user, assistant, gen, set_default_backend, Runtime
-
-
-@function
-def multi_turn_question(s, question_1, question_2):
-    s += system("You are a helpful assistant.")
-    s += user(question_1)
-    s += assistant(gen("answer_1", max_tokens=256))
-    s += user(question_2)
-    s += assistant(gen("answer_2", max_tokens=256))
-
-runtime = Runtime("meta-llama/Llama-2-7b-chat-hf")
-set_default_backend(runtime)
-
-state = multi_turn_question.run(
-    question_1="What is the capital of the United States?",
-    question_2="List two local attractions.",
-    temperature=0,
-    stream=True,
-)
-
-for out in state.text_iter():
-    print(out, end="", flush=True)
-print()
-
-runtime.shutdown()