release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com>
Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com>
Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com>
Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>
This commit is contained in:
Lianmin Zheng
2024-01-08 04:37:50 +00:00
parent f6d40df0ee
commit 22085081bb
145 changed files with 17802 additions and 2 deletions

60
test/lang/run_all.py Normal file
View File

@@ -0,0 +1,60 @@
import argparse
import glob
import multiprocessing
import os
import time
import unittest
from sglang.utils import run_with_timeout
def run_unittest_files(files, args):
for filename in files:
def func():
print(filename)
ret = unittest.main(module=None, argv=["", "-vb"] + [filename])
p = multiprocessing.Process(target=func)
def run_one_file():
p.start()
p.join()
try:
run_with_timeout(run_one_file, timeout=args.time_limit_per_file)
if p.exitcode != 0:
return False
except TimeoutError:
p.terminate()
time.sleep(5)
print(
f"\nTimeout after {args.time_limit_per_file} seconds "
f"when running {filename}"
)
return False
return True
if __name__ == "__main__":
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument(
"--time-limit-per-file",
type=int,
default=1000,
help="The time limit for running one file in seconds.",
)
args = arg_parser.parse_args()
files = glob.glob("**/test_*.py", recursive=True)
tic = time.time()
success = run_unittest_files(files, args)
if success:
print(f"Success. Time elapsed: {time.time() - tic:.2f}s")
else:
print(f"Fail. Time elapsed: {time.time() - tic:.2f}s")
exit(0 if success else -1)

View File

@@ -0,0 +1,35 @@
import json
import unittest
from sglang.test.test_programs import test_mt_bench, test_stream
from sglang import Anthropic, set_default_backend
class TestAnthropicBackend(unittest.TestCase):
backend = None
chat_backend = None
def setUp(self):
cls = type(self)
if cls.backend is None:
cls.backend = Anthropic("claude-2")
set_default_backend(cls.backend)
def test_mt_bench(self):
test_mt_bench()
def test_stream(self):
test_stream()
if __name__ == "__main__":
unittest.main(warnings="ignore")
# from sglang.global_config import global_config
# global_config.verbosity = 2
# t = TestAnthropicBackend()
# t.setUp()
# t.test_mt_bench()

View File

@@ -0,0 +1,54 @@
import unittest
from sglang.backend.runtime_endpoint import RuntimeEndpoint
import sglang as sgl
class TestBind(unittest.TestCase):
backend = None
def setUp(self):
cls = type(self)
if cls.backend is None:
cls.backend = RuntimeEndpoint(base_url="http://localhost:30000")
def test_bind(self):
@sgl.function
def few_shot_qa(s, prompt, question):
s += prompt
s += "Q: What is the capital of France?\n"
s += "A: Paris\n"
s += "Q: " + question + "\n"
s += "A:" + sgl.gen("answer", stop="\n")
few_shot_qa_2 = few_shot_qa.bind(
prompt="The following are questions with answers.\n\n"
)
tracer = few_shot_qa_2.trace()
print(tracer.last_node.print_graph_dfs() + "\n")
def test_pin(self):
@sgl.function
def few_shot_qa(s, prompt, question):
s += prompt
s += "Q: What is the capital of France?\n"
s += "A: Paris\n"
s += "Q: " + question + "\n"
s += "A:" + sgl.gen("answer", stop="\n")
few_shot_qa_2 = few_shot_qa.bind(
prompt="Answer the following questions as if you were a 5-year-old kid.\n\n"
)
few_shot_qa_2.pin(self.backend)
few_shot_qa_2.unpin(self.backend)
if __name__ == "__main__":
unittest.main(warnings="ignore")
# t = TestBind()
# t.setUp()
# t.test_pin()

View File

@@ -0,0 +1,91 @@
import unittest
from sglang.test.test_programs import (
test_decode_int,
test_decode_json,
test_expert_answer,
test_few_shot_qa,
test_image_qa,
test_mt_bench,
test_parallel_decoding,
test_parallel_encoding,
test_react,
test_select,
test_stream,
test_tool_use,
)
from sglang import OpenAI, set_default_backend
class TestOpenAIBackend(unittest.TestCase):
backend = None
chat_backend = None
chat_vision_backend = None
def setUp(self):
cls = type(self)
if cls.backend is None:
cls.backend = OpenAI("gpt-3.5-turbo-instruct")
cls.chat_backend = OpenAI("gpt-3.5-turbo")
cls.chat_vision_backend = OpenAI("gpt-4-vision-preview")
def test_few_shot_qa(self):
set_default_backend(self.backend)
test_few_shot_qa()
def test_mt_bench(self):
set_default_backend(self.chat_backend)
test_mt_bench()
def test_select(self):
set_default_backend(self.backend)
test_select(check_answer=True)
def test_decode_int(self):
set_default_backend(self.backend)
test_decode_int()
def test_decode_json(self):
set_default_backend(self.backend)
test_decode_json()
def test_expert_answer(self):
set_default_backend(self.backend)
test_expert_answer()
def test_tool_use(self):
set_default_backend(self.backend)
test_tool_use()
def test_react(self):
set_default_backend(self.backend)
test_react()
def test_parallel_decoding(self):
set_default_backend(self.backend)
test_parallel_decoding()
def test_parallel_encoding(self):
set_default_backend(self.backend)
test_parallel_encoding()
def test_image_qa(self):
set_default_backend(self.chat_vision_backend)
test_image_qa()
def test_stream(self):
set_default_backend(self.backend)
test_stream()
if __name__ == "__main__":
unittest.main(warnings="ignore")
# from sglang.global_config import global_config
# global_config.verbosity = 2
# t = TestOpenAIBackend()
# t.setUp()
# t.test_decode_json()

View File

@@ -0,0 +1,74 @@
"""
python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
"""
import json
import unittest
from sglang.test.test_programs import (
test_decode_int,
test_decode_json,
test_expert_answer,
test_few_shot_qa,
test_mt_bench,
test_parallel_decoding,
test_parallel_encoding,
test_react,
test_regex,
test_select,
test_stream,
test_tool_use,
)
import sglang as sgl
class TestSRTBackend(unittest.TestCase):
backend = None
def setUp(self):
cls = type(self)
if cls.backend is None:
cls.backend = sgl.RuntimeEndpoint(base_url="http://localhost:30000")
sgl.set_default_backend(cls.backend)
def test_few_shot_qa(self):
test_few_shot_qa()
def test_mt_bench(self):
test_mt_bench()
def test_select(self):
test_select(check_answer=False)
def test_decode_int(self):
test_decode_int()
def test_expert_answer(self):
test_expert_answer()
def test_tool_use(self):
test_tool_use()
def test_parallel_decoding(self):
test_parallel_decoding()
def test_stream(self):
test_stream()
def test_regex(self):
test_regex()
# def test_parallel_encoding(self):
# test_parallel_encoding(check_answer=False)
if __name__ == "__main__":
unittest.main(warnings="ignore")
# from sglang.global_config import global_config
# global_config.verbosity = 2
# t = TestSRTBackend()
# t.setUp()
# t.test_regex()

132
test/lang/test_tracing.py Normal file
View File

@@ -0,0 +1,132 @@
import unittest
from sglang.backend.base_backend import BaseBackend
from sglang.lang.chat_template import get_chat_template
import sglang as sgl
class TestTracing(unittest.TestCase):
def test_few_shot_qa(self):
@sgl.function
def few_shot_qa(s, question):
s += "The following are questions with answers.\n\n"
s += "Q: What is the capital of France?\n"
s += "A: Paris\n"
s += "Q: " + question + "\n"
s += "A:" + sgl.gen("answer", stop="\n")
tracer = few_shot_qa.trace()
print(tracer.last_node.print_graph_dfs() + "\n")
def test_select(self):
@sgl.function
def capital(s):
s += "The capital of France is"
s += sgl.select("capital", ["Paris. ", "London. "])
s += "It is a city" + sgl.gen("description", stop=".")
tracer = capital.trace()
print(tracer.last_node.print_graph_dfs() + "\n")
def test_raise_warning(self):
@sgl.function
def wrong(s, question):
s += f"I want to ask {question}"
try:
tracer = wrong.trace()
raised = False
except TypeError:
raised = True
assert raised
def test_multi_function(self):
@sgl.function
def expand(s, tip):
s += (
"Please expand the following tip into a detailed paragraph:"
+ tip
+ "\n"
)
s += sgl.gen("detailed_tip")
@sgl.function
def tip_suggestion(s, topic):
s += "Here are 2 tips for " + topic + ".\n"
s += "1." + sgl.gen("tip_1", stop=["\n", ":", "."]) + "\n"
s += "2." + sgl.gen("tip_2", stop=["\n", ":", "."]) + "\n"
branch1 = expand(tip=s["tip_1"])
branch2 = expand(tip=s["tip_2"])
s += "Tip 1: " + branch1["detailed_tip"] + "\n"
s += "Tip 2: " + branch2["detailed_tip"] + "\n"
s += "In summary" + sgl.gen("summary")
compiled = tip_suggestion.compile()
compiled.print_graph()
sgl.set_default_backend(sgl.OpenAI("gpt-3.5-turbo-instruct"))
state = compiled.run(topic="staying healthy")
print(state.text() + "\n")
states = compiled.run_batch(
[
{"topic": "staying healthy"},
{"topic": "staying happy"},
{"topic": "earning money"},
],
temperature=0,
)
for s in states:
print(s.text() + "\n")
def test_role(self):
@sgl.function
def multi_turn_chat(s):
s += sgl.user("Who are you?")
s += sgl.assistant(sgl.gen("answer_1"))
s += sgl.user("Who created you?")
s += sgl.assistant(sgl.gen("answer_2"))
backend = BaseBackend()
backend.chat_template = get_chat_template("llama-2-chat")
compiled = multi_turn_chat.compile(backend=backend)
compiled.print_graph()
def test_fork(self):
@sgl.function
def tip_suggestion(s):
s += (
"Here are three tips for staying healthy: "
"1. Balanced Diet; "
"2. Regular Exercise; "
"3. Adequate Sleep\n"
)
forks = s.fork(3)
for i in range(3):
forks[i] += f"Now, expand tip {i+1} into a paragraph:\n"
forks[i] += sgl.gen(f"detailed_tip")
s += "Tip 1:" + forks[0]["detailed_tip"] + "\n"
s += "Tip 2:" + forks[1]["detailed_tip"] + "\n"
s += "Tip 3:" + forks[2]["detailed_tip"] + "\n"
s += "In summary" + sgl.gen("summary")
tracer = tip_suggestion.trace()
print(tracer.last_node.print_graph_dfs())
a = tip_suggestion.run(backend=sgl.OpenAI("gpt-3.5-turbo-instruct"))
print(a.text())
if __name__ == "__main__":
unittest.main(warnings="ignore")
# t = TestTracing()
# t.test_fork()