Improve docs & Add JSON decode example (#121)

This commit is contained in:
Lianmin Zheng
2024-01-30 05:45:27 -08:00
committed by GitHub
parent 0617528632
commit 97aa9b3284
19 changed files with 212 additions and 61 deletions

View File

@@ -1,3 +1,7 @@
"""
Usage:
python3 async_io.py
"""
import asyncio
from sglang import Runtime
@@ -27,8 +31,8 @@ async def generate(
if __name__ == "__main__":
runtime = Runtime(model_path="meta-llama/Llama-2-7b-chat-hf")
print("runtime ready")
print("--- runtime ready ---\n")
prompt = "Who is Alan Turing?"
sampling_params = {"max_new_tokens": 128}
asyncio.run(generate(runtime, prompt, sampling_params))

View File

@@ -1,7 +1,8 @@
"""
Usage:
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
python choices_logprob.py
"""
import sglang as sgl

View File

@@ -0,0 +1,81 @@
"""
Usage:
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
python json_decode.py
"""
from enum import Enum
from pydantic import BaseModel, constr
import sglang as sgl
from sglang.srt.constrained.json_schema import build_regex_from_object
character_regex = (
r"""\{\n"""
+ r""" "name": "[\w\d\s]{1,16}",\n"""
+ r""" "house": "(Gryffindor|Slytherin|Ravenclaw|Hufflepuff)",\n"""
+ r""" "blood status": "(Pure-blood|Half-blood|Muggle-born)",\n"""
+ r""" "occupation": "(student|teacher|auror|ministry of magic|death eater|order of the phoenix)",\n"""
+ r""" "wand": \{\n"""
+ r""" "wood": "[\w\d\s]{1,16}",\n"""
+ r""" "core": "[\w\d\s]{1,16}",\n"""
+ r""" "length": [0-9]{1,2}\.[0-9]{0,2}\n"""
+ r""" \},\n"""
+ r""" "alive": "(Alive|Deceased)",\n"""
+ r""" "patronus": "[\w\d\s]{1,16}",\n"""
+ r""" "bogart": "[\w\d\s]{1,16}"\n"""
+ r"""\}"""
)
@sgl.function
def character_gen(s, name):
s += name + " is a character in Harry Potter. Please fill in the following information about this character.\n"
s += sgl.gen("json_output", max_tokens=256, regex=character_regex)
def driver_character_gen():
state = character_gen.run(name="Hermione Granger")
print(state.text())
class Weapon(str, Enum):
sword = "sword"
axe = "axe"
mace = "mace"
spear = "spear"
bow = "bow"
crossbow = "crossbow"
class Wizard(BaseModel):
name: str
age: int
weapon: Weapon
@sgl.function
def pydantic_wizard_gen(s):
s += "Give me a description about a wizard in the JSON format.\n"
s += sgl.gen(
"character",
max_tokens=128,
temperature=0,
regex=build_regex_from_object(Wizard), # Requires pydantic >= 2.0
)
def driver_character_gen():
state = character_gen.run(name="Hermione Granger")
print(state.text())
def driver_pydantic_wizard_gen():
state = pydantic_wizard_gen.run()
print(state.text())
if __name__ == "__main__":
sgl.set_default_backend(sgl.RuntimeEndpoint("http://localhost:30000"))
driver_character_gen()
# driver_pydantic_wizard_gen()

View File

@@ -1,3 +1,7 @@
"""
Usage:
python3 openai_speculative.py
"""
from sglang import function, gen, set_default_backend, OpenAI

View File

@@ -1,3 +1,7 @@
"""
Usage:
python3 parallel_sample.py
"""
import sglang as sgl

View File

@@ -1,14 +1,20 @@
"""
Usage:
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
python readme_examples.py
"""
import sglang as sgl
@sgl.function
def tool_use(s, question):
s += "To answer this question: " + question + ", "
s += "I need to use a " + sgl.gen("tool", choices=["calculator", "web browser"]) + ". "
s += "To answer this question: " + question + ". "
s += "I need to use a " + sgl.gen("tool", choices=["calculator", "search engine"]) + ". "
if s["tool"] == "calculator":
s += "The math expression is" + sgl.gen("expression")
elif s["tool"] == "web browser":
s += "The website url is" + sgl.gen("url")
elif s["tool"] == "search engine":
s += "The key word to search is" + sgl.gen("word")
@sgl.function
@@ -28,6 +34,16 @@ def tip_suggestion(s):
s += "In summary" + sgl.gen("summary")
@sgl.function
def regular_expression_gen(s):
s += "Q: What is the IP address of the Google DNS servers?\n"
s += "A: " + sgl.gen(
"answer",
temperature=0,
regex=r"((25[0-5]|2[0-4]\d|[01]?\d\d?).){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)",
)
@sgl.function
def text_qa(s, question):
s += "Q: " + question + "\n"
@@ -46,6 +62,12 @@ def driver_tip_suggestion():
print("\n")
def driver_regex():
state = regular_expression_gen.run()
print(state.text())
print("\n")
def driver_batching():
states = text_qa.run_batch(
[
@@ -74,9 +96,11 @@ def driver_stream():
if __name__ == "__main__":
sgl.set_default_backend(sgl.OpenAI("gpt-3.5-turbo-instruct"))
#sgl.set_default_backend(sgl.OpenAI("gpt-3.5-turbo-instruct"))
sgl.set_default_backend(sgl.RuntimeEndpoint("http://localhost:30000"))
driver_tool_use()
driver_tip_suggestion()
driver_regex()
driver_batching()
driver_stream()

View File

@@ -1,24 +0,0 @@
from sglang import function, gen, set_default_backend, Runtime
IP_ADDR_REGEX = r"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)"
@function
def regex_gen(s):
s += "Q: What is the IP address of the Google DNS servers?\n"
s += "A: " + gen(
"answer",
temperature=0,
regex=IP_ADDR_REGEX,
)
runtime = Runtime(model_path="meta-llama/Llama-2-7b-chat-hf")
set_default_backend(runtime)
state = regex_gen.run()
print(state.text())
runtime.shutdown()

View File

@@ -1,3 +1,7 @@
"""
Usage:
python3 streaming.py
"""
import asyncio
import sglang as sgl