release initial code
Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>
This commit is contained in:
60
test/lang/run_all.py
Normal file
60
test/lang/run_all.py
Normal file
@@ -0,0 +1,60 @@
|
||||
import argparse
|
||||
import glob
|
||||
import multiprocessing
|
||||
import os
|
||||
import time
|
||||
import unittest
|
||||
|
||||
from sglang.utils import run_with_timeout
|
||||
|
||||
|
||||
def run_unittest_files(files, args):
|
||||
for filename in files:
|
||||
|
||||
def func():
|
||||
print(filename)
|
||||
ret = unittest.main(module=None, argv=["", "-vb"] + [filename])
|
||||
|
||||
p = multiprocessing.Process(target=func)
|
||||
|
||||
def run_one_file():
|
||||
p.start()
|
||||
p.join()
|
||||
|
||||
try:
|
||||
run_with_timeout(run_one_file, timeout=args.time_limit_per_file)
|
||||
if p.exitcode != 0:
|
||||
return False
|
||||
except TimeoutError:
|
||||
p.terminate()
|
||||
time.sleep(5)
|
||||
print(
|
||||
f"\nTimeout after {args.time_limit_per_file} seconds "
|
||||
f"when running {filename}"
|
||||
)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
arg_parser = argparse.ArgumentParser()
|
||||
arg_parser.add_argument(
|
||||
"--time-limit-per-file",
|
||||
type=int,
|
||||
default=1000,
|
||||
help="The time limit for running one file in seconds.",
|
||||
)
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
files = glob.glob("**/test_*.py", recursive=True)
|
||||
|
||||
tic = time.time()
|
||||
success = run_unittest_files(files, args)
|
||||
|
||||
if success:
|
||||
print(f"Success. Time elapsed: {time.time() - tic:.2f}s")
|
||||
else:
|
||||
print(f"Fail. Time elapsed: {time.time() - tic:.2f}s")
|
||||
|
||||
exit(0 if success else -1)
|
||||
35
test/lang/test_anthropic_backend.py
Normal file
35
test/lang/test_anthropic_backend.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import json
|
||||
import unittest
|
||||
|
||||
from sglang.test.test_programs import test_mt_bench, test_stream
|
||||
|
||||
from sglang import Anthropic, set_default_backend
|
||||
|
||||
|
||||
class TestAnthropicBackend(unittest.TestCase):
|
||||
backend = None
|
||||
chat_backend = None
|
||||
|
||||
def setUp(self):
|
||||
cls = type(self)
|
||||
|
||||
if cls.backend is None:
|
||||
cls.backend = Anthropic("claude-2")
|
||||
set_default_backend(cls.backend)
|
||||
|
||||
def test_mt_bench(self):
|
||||
test_mt_bench()
|
||||
|
||||
def test_stream(self):
|
||||
test_stream()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# from sglang.global_config import global_config
|
||||
|
||||
# global_config.verbosity = 2
|
||||
# t = TestAnthropicBackend()
|
||||
# t.setUp()
|
||||
# t.test_mt_bench()
|
||||
54
test/lang/test_bind_pin.py
Normal file
54
test/lang/test_bind_pin.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import unittest
|
||||
|
||||
from sglang.backend.runtime_endpoint import RuntimeEndpoint
|
||||
|
||||
import sglang as sgl
|
||||
|
||||
|
||||
class TestBind(unittest.TestCase):
|
||||
backend = None
|
||||
|
||||
def setUp(self):
|
||||
cls = type(self)
|
||||
|
||||
if cls.backend is None:
|
||||
cls.backend = RuntimeEndpoint(base_url="http://localhost:30000")
|
||||
|
||||
def test_bind(self):
|
||||
@sgl.function
|
||||
def few_shot_qa(s, prompt, question):
|
||||
s += prompt
|
||||
s += "Q: What is the capital of France?\n"
|
||||
s += "A: Paris\n"
|
||||
s += "Q: " + question + "\n"
|
||||
s += "A:" + sgl.gen("answer", stop="\n")
|
||||
|
||||
few_shot_qa_2 = few_shot_qa.bind(
|
||||
prompt="The following are questions with answers.\n\n"
|
||||
)
|
||||
|
||||
tracer = few_shot_qa_2.trace()
|
||||
print(tracer.last_node.print_graph_dfs() + "\n")
|
||||
|
||||
def test_pin(self):
|
||||
@sgl.function
|
||||
def few_shot_qa(s, prompt, question):
|
||||
s += prompt
|
||||
s += "Q: What is the capital of France?\n"
|
||||
s += "A: Paris\n"
|
||||
s += "Q: " + question + "\n"
|
||||
s += "A:" + sgl.gen("answer", stop="\n")
|
||||
|
||||
few_shot_qa_2 = few_shot_qa.bind(
|
||||
prompt="Answer the following questions as if you were a 5-year-old kid.\n\n"
|
||||
)
|
||||
few_shot_qa_2.pin(self.backend)
|
||||
few_shot_qa_2.unpin(self.backend)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# t = TestBind()
|
||||
# t.setUp()
|
||||
# t.test_pin()
|
||||
91
test/lang/test_openai_backend.py
Normal file
91
test/lang/test_openai_backend.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import unittest
|
||||
|
||||
from sglang.test.test_programs import (
|
||||
test_decode_int,
|
||||
test_decode_json,
|
||||
test_expert_answer,
|
||||
test_few_shot_qa,
|
||||
test_image_qa,
|
||||
test_mt_bench,
|
||||
test_parallel_decoding,
|
||||
test_parallel_encoding,
|
||||
test_react,
|
||||
test_select,
|
||||
test_stream,
|
||||
test_tool_use,
|
||||
)
|
||||
|
||||
from sglang import OpenAI, set_default_backend
|
||||
|
||||
|
||||
class TestOpenAIBackend(unittest.TestCase):
|
||||
backend = None
|
||||
chat_backend = None
|
||||
chat_vision_backend = None
|
||||
|
||||
def setUp(self):
|
||||
cls = type(self)
|
||||
|
||||
if cls.backend is None:
|
||||
cls.backend = OpenAI("gpt-3.5-turbo-instruct")
|
||||
cls.chat_backend = OpenAI("gpt-3.5-turbo")
|
||||
cls.chat_vision_backend = OpenAI("gpt-4-vision-preview")
|
||||
|
||||
def test_few_shot_qa(self):
|
||||
set_default_backend(self.backend)
|
||||
test_few_shot_qa()
|
||||
|
||||
def test_mt_bench(self):
|
||||
set_default_backend(self.chat_backend)
|
||||
test_mt_bench()
|
||||
|
||||
def test_select(self):
|
||||
set_default_backend(self.backend)
|
||||
test_select(check_answer=True)
|
||||
|
||||
def test_decode_int(self):
|
||||
set_default_backend(self.backend)
|
||||
test_decode_int()
|
||||
|
||||
def test_decode_json(self):
|
||||
set_default_backend(self.backend)
|
||||
test_decode_json()
|
||||
|
||||
def test_expert_answer(self):
|
||||
set_default_backend(self.backend)
|
||||
test_expert_answer()
|
||||
|
||||
def test_tool_use(self):
|
||||
set_default_backend(self.backend)
|
||||
test_tool_use()
|
||||
|
||||
def test_react(self):
|
||||
set_default_backend(self.backend)
|
||||
test_react()
|
||||
|
||||
def test_parallel_decoding(self):
|
||||
set_default_backend(self.backend)
|
||||
test_parallel_decoding()
|
||||
|
||||
def test_parallel_encoding(self):
|
||||
set_default_backend(self.backend)
|
||||
test_parallel_encoding()
|
||||
|
||||
def test_image_qa(self):
|
||||
set_default_backend(self.chat_vision_backend)
|
||||
test_image_qa()
|
||||
|
||||
def test_stream(self):
|
||||
set_default_backend(self.backend)
|
||||
test_stream()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# from sglang.global_config import global_config
|
||||
|
||||
# global_config.verbosity = 2
|
||||
# t = TestOpenAIBackend()
|
||||
# t.setUp()
|
||||
# t.test_decode_json()
|
||||
74
test/lang/test_srt_backend.py
Normal file
74
test/lang/test_srt_backend.py
Normal file
@@ -0,0 +1,74 @@
|
||||
"""
|
||||
python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
|
||||
"""
|
||||
import json
|
||||
import unittest
|
||||
|
||||
from sglang.test.test_programs import (
|
||||
test_decode_int,
|
||||
test_decode_json,
|
||||
test_expert_answer,
|
||||
test_few_shot_qa,
|
||||
test_mt_bench,
|
||||
test_parallel_decoding,
|
||||
test_parallel_encoding,
|
||||
test_react,
|
||||
test_regex,
|
||||
test_select,
|
||||
test_stream,
|
||||
test_tool_use,
|
||||
)
|
||||
|
||||
import sglang as sgl
|
||||
|
||||
|
||||
class TestSRTBackend(unittest.TestCase):
|
||||
backend = None
|
||||
|
||||
def setUp(self):
|
||||
cls = type(self)
|
||||
|
||||
if cls.backend is None:
|
||||
cls.backend = sgl.RuntimeEndpoint(base_url="http://localhost:30000")
|
||||
sgl.set_default_backend(cls.backend)
|
||||
|
||||
def test_few_shot_qa(self):
|
||||
test_few_shot_qa()
|
||||
|
||||
def test_mt_bench(self):
|
||||
test_mt_bench()
|
||||
|
||||
def test_select(self):
|
||||
test_select(check_answer=False)
|
||||
|
||||
def test_decode_int(self):
|
||||
test_decode_int()
|
||||
|
||||
def test_expert_answer(self):
|
||||
test_expert_answer()
|
||||
|
||||
def test_tool_use(self):
|
||||
test_tool_use()
|
||||
|
||||
def test_parallel_decoding(self):
|
||||
test_parallel_decoding()
|
||||
|
||||
def test_stream(self):
|
||||
test_stream()
|
||||
|
||||
def test_regex(self):
|
||||
test_regex()
|
||||
|
||||
# def test_parallel_encoding(self):
|
||||
# test_parallel_encoding(check_answer=False)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# from sglang.global_config import global_config
|
||||
|
||||
# global_config.verbosity = 2
|
||||
# t = TestSRTBackend()
|
||||
# t.setUp()
|
||||
# t.test_regex()
|
||||
132
test/lang/test_tracing.py
Normal file
132
test/lang/test_tracing.py
Normal file
@@ -0,0 +1,132 @@
|
||||
import unittest
|
||||
|
||||
from sglang.backend.base_backend import BaseBackend
|
||||
from sglang.lang.chat_template import get_chat_template
|
||||
|
||||
import sglang as sgl
|
||||
|
||||
|
||||
class TestTracing(unittest.TestCase):
|
||||
def test_few_shot_qa(self):
|
||||
@sgl.function
|
||||
def few_shot_qa(s, question):
|
||||
s += "The following are questions with answers.\n\n"
|
||||
s += "Q: What is the capital of France?\n"
|
||||
s += "A: Paris\n"
|
||||
s += "Q: " + question + "\n"
|
||||
s += "A:" + sgl.gen("answer", stop="\n")
|
||||
|
||||
tracer = few_shot_qa.trace()
|
||||
print(tracer.last_node.print_graph_dfs() + "\n")
|
||||
|
||||
def test_select(self):
|
||||
@sgl.function
|
||||
def capital(s):
|
||||
s += "The capital of France is"
|
||||
s += sgl.select("capital", ["Paris. ", "London. "])
|
||||
s += "It is a city" + sgl.gen("description", stop=".")
|
||||
|
||||
tracer = capital.trace()
|
||||
print(tracer.last_node.print_graph_dfs() + "\n")
|
||||
|
||||
def test_raise_warning(self):
|
||||
@sgl.function
|
||||
def wrong(s, question):
|
||||
s += f"I want to ask {question}"
|
||||
|
||||
try:
|
||||
tracer = wrong.trace()
|
||||
raised = False
|
||||
except TypeError:
|
||||
raised = True
|
||||
|
||||
assert raised
|
||||
|
||||
def test_multi_function(self):
|
||||
@sgl.function
|
||||
def expand(s, tip):
|
||||
s += (
|
||||
"Please expand the following tip into a detailed paragraph:"
|
||||
+ tip
|
||||
+ "\n"
|
||||
)
|
||||
s += sgl.gen("detailed_tip")
|
||||
|
||||
@sgl.function
|
||||
def tip_suggestion(s, topic):
|
||||
s += "Here are 2 tips for " + topic + ".\n"
|
||||
|
||||
s += "1." + sgl.gen("tip_1", stop=["\n", ":", "."]) + "\n"
|
||||
s += "2." + sgl.gen("tip_2", stop=["\n", ":", "."]) + "\n"
|
||||
|
||||
branch1 = expand(tip=s["tip_1"])
|
||||
branch2 = expand(tip=s["tip_2"])
|
||||
|
||||
s += "Tip 1: " + branch1["detailed_tip"] + "\n"
|
||||
s += "Tip 2: " + branch2["detailed_tip"] + "\n"
|
||||
s += "In summary" + sgl.gen("summary")
|
||||
|
||||
compiled = tip_suggestion.compile()
|
||||
compiled.print_graph()
|
||||
|
||||
sgl.set_default_backend(sgl.OpenAI("gpt-3.5-turbo-instruct"))
|
||||
state = compiled.run(topic="staying healthy")
|
||||
print(state.text() + "\n")
|
||||
|
||||
states = compiled.run_batch(
|
||||
[
|
||||
{"topic": "staying healthy"},
|
||||
{"topic": "staying happy"},
|
||||
{"topic": "earning money"},
|
||||
],
|
||||
temperature=0,
|
||||
)
|
||||
for s in states:
|
||||
print(s.text() + "\n")
|
||||
|
||||
def test_role(self):
|
||||
@sgl.function
|
||||
def multi_turn_chat(s):
|
||||
s += sgl.user("Who are you?")
|
||||
s += sgl.assistant(sgl.gen("answer_1"))
|
||||
s += sgl.user("Who created you?")
|
||||
s += sgl.assistant(sgl.gen("answer_2"))
|
||||
|
||||
backend = BaseBackend()
|
||||
backend.chat_template = get_chat_template("llama-2-chat")
|
||||
|
||||
compiled = multi_turn_chat.compile(backend=backend)
|
||||
compiled.print_graph()
|
||||
|
||||
def test_fork(self):
|
||||
@sgl.function
|
||||
def tip_suggestion(s):
|
||||
s += (
|
||||
"Here are three tips for staying healthy: "
|
||||
"1. Balanced Diet; "
|
||||
"2. Regular Exercise; "
|
||||
"3. Adequate Sleep\n"
|
||||
)
|
||||
|
||||
forks = s.fork(3)
|
||||
for i in range(3):
|
||||
forks[i] += f"Now, expand tip {i+1} into a paragraph:\n"
|
||||
forks[i] += sgl.gen(f"detailed_tip")
|
||||
|
||||
s += "Tip 1:" + forks[0]["detailed_tip"] + "\n"
|
||||
s += "Tip 2:" + forks[1]["detailed_tip"] + "\n"
|
||||
s += "Tip 3:" + forks[2]["detailed_tip"] + "\n"
|
||||
s += "In summary" + sgl.gen("summary")
|
||||
|
||||
tracer = tip_suggestion.trace()
|
||||
print(tracer.last_node.print_graph_dfs())
|
||||
|
||||
a = tip_suggestion.run(backend=sgl.OpenAI("gpt-3.5-turbo-instruct"))
|
||||
print(a.text())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main(warnings="ignore")
|
||||
|
||||
# t = TestTracing()
|
||||
# t.test_fork()
|
||||
Reference in New Issue
Block a user