release initial code

Co-authored-by: Ying Sheng <sqy1415@gmail.com> Co-authored-by: Liangsheng Yin <hnyls2002@gmail.com> Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu> Co-authored-by: parasol-aser <3848358+parasol-aser@users.noreply.github.com> Co-authored-by: LiviaSun <33578456+ChuyueSun@users.noreply.github.com> Co-authored-by: Cody Yu <hao.yu.cody@gmail.com>
2024-01-08 04:37:50 +00:00
parent f6d40df0ee
commit 22085081bb
145 changed files with 17802 additions and 2 deletions
--- a/test/lang/run_all.py
+++ b/test/lang/run_all.py
@@ -0,0 +1,60 @@
+import argparse
+import glob
+import multiprocessing
+import os
+import time
+import unittest
+
+from sglang.utils import run_with_timeout
+
+
+def run_unittest_files(files, args):
+    for filename in files:
+
+        def func():
+            print(filename)
+            ret = unittest.main(module=None, argv=["", "-vb"] + [filename])
+
+        p = multiprocessing.Process(target=func)
+
+        def run_one_file():
+            p.start()
+            p.join()
+
+        try:
+            run_with_timeout(run_one_file, timeout=args.time_limit_per_file)
+            if p.exitcode != 0:
+                return False
+        except TimeoutError:
+            p.terminate()
+            time.sleep(5)
+            print(
+                f"\nTimeout after {args.time_limit_per_file} seconds "
+                f"when running {filename}"
+            )
+            return False
+
+    return True
+
+
+if __name__ == "__main__":
+    arg_parser = argparse.ArgumentParser()
+    arg_parser.add_argument(
+        "--time-limit-per-file",
+        type=int,
+        default=1000,
+        help="The time limit for running one file in seconds.",
+    )
+    args = arg_parser.parse_args()
+
+    files = glob.glob("**/test_*.py", recursive=True)
+
+    tic = time.time()
+    success = run_unittest_files(files, args)
+
+    if success:
+        print(f"Success. Time elapsed: {time.time() - tic:.2f}s")
+    else:
+        print(f"Fail. Time elapsed: {time.time() - tic:.2f}s")
+
+    exit(0 if success else -1)
--- a/test/lang/test_anthropic_backend.py
+++ b/test/lang/test_anthropic_backend.py
@@ -0,0 +1,35 @@
+import json
+import unittest
+
+from sglang.test.test_programs import test_mt_bench, test_stream
+
+from sglang import Anthropic, set_default_backend
+
+
+class TestAnthropicBackend(unittest.TestCase):
+    backend = None
+    chat_backend = None
+
+    def setUp(self):
+        cls = type(self)
+
+        if cls.backend is None:
+            cls.backend = Anthropic("claude-2")
+            set_default_backend(cls.backend)
+
+    def test_mt_bench(self):
+        test_mt_bench()
+
+    def test_stream(self):
+        test_stream()
+
+
+if __name__ == "__main__":
+    unittest.main(warnings="ignore")
+
+    # from sglang.global_config import global_config
+
+    # global_config.verbosity = 2
+    # t = TestAnthropicBackend()
+    # t.setUp()
+    # t.test_mt_bench()
--- a/test/lang/test_bind_pin.py
+++ b/test/lang/test_bind_pin.py
@@ -0,0 +1,54 @@
+import unittest
+
+from sglang.backend.runtime_endpoint import RuntimeEndpoint
+
+import sglang as sgl
+
+
+class TestBind(unittest.TestCase):
+    backend = None
+
+    def setUp(self):
+        cls = type(self)
+
+        if cls.backend is None:
+            cls.backend = RuntimeEndpoint(base_url="http://localhost:30000")
+
+    def test_bind(self):
+        @sgl.function
+        def few_shot_qa(s, prompt, question):
+            s += prompt
+            s += "Q: What is the capital of France?\n"
+            s += "A: Paris\n"
+            s += "Q: " + question + "\n"
+            s += "A:" + sgl.gen("answer", stop="\n")
+
+        few_shot_qa_2 = few_shot_qa.bind(
+            prompt="The following are questions with answers.\n\n"
+        )
+
+        tracer = few_shot_qa_2.trace()
+        print(tracer.last_node.print_graph_dfs() + "\n")
+
+    def test_pin(self):
+        @sgl.function
+        def few_shot_qa(s, prompt, question):
+            s += prompt
+            s += "Q: What is the capital of France?\n"
+            s += "A: Paris\n"
+            s += "Q: " + question + "\n"
+            s += "A:" + sgl.gen("answer", stop="\n")
+
+        few_shot_qa_2 = few_shot_qa.bind(
+            prompt="Answer the following questions as if you were a 5-year-old kid.\n\n"
+        )
+        few_shot_qa_2.pin(self.backend)
+        few_shot_qa_2.unpin(self.backend)
+
+
+if __name__ == "__main__":
+    unittest.main(warnings="ignore")
+
+    # t = TestBind()
+    # t.setUp()
+    # t.test_pin()
--- a/test/lang/test_openai_backend.py
+++ b/test/lang/test_openai_backend.py
@@ -0,0 +1,91 @@
+import unittest
+
+from sglang.test.test_programs import (
+    test_decode_int,
+    test_decode_json,
+    test_expert_answer,
+    test_few_shot_qa,
+    test_image_qa,
+    test_mt_bench,
+    test_parallel_decoding,
+    test_parallel_encoding,
+    test_react,
+    test_select,
+    test_stream,
+    test_tool_use,
+)
+
+from sglang import OpenAI, set_default_backend
+
+
+class TestOpenAIBackend(unittest.TestCase):
+    backend = None
+    chat_backend = None
+    chat_vision_backend = None
+
+    def setUp(self):
+        cls = type(self)
+
+        if cls.backend is None:
+            cls.backend = OpenAI("gpt-3.5-turbo-instruct")
+            cls.chat_backend = OpenAI("gpt-3.5-turbo")
+            cls.chat_vision_backend = OpenAI("gpt-4-vision-preview")
+
+    def test_few_shot_qa(self):
+        set_default_backend(self.backend)
+        test_few_shot_qa()
+
+    def test_mt_bench(self):
+        set_default_backend(self.chat_backend)
+        test_mt_bench()
+
+    def test_select(self):
+        set_default_backend(self.backend)
+        test_select(check_answer=True)
+
+    def test_decode_int(self):
+        set_default_backend(self.backend)
+        test_decode_int()
+
+    def test_decode_json(self):
+        set_default_backend(self.backend)
+        test_decode_json()
+
+    def test_expert_answer(self):
+        set_default_backend(self.backend)
+        test_expert_answer()
+
+    def test_tool_use(self):
+        set_default_backend(self.backend)
+        test_tool_use()
+
+    def test_react(self):
+        set_default_backend(self.backend)
+        test_react()
+
+    def test_parallel_decoding(self):
+        set_default_backend(self.backend)
+        test_parallel_decoding()
+
+    def test_parallel_encoding(self):
+        set_default_backend(self.backend)
+        test_parallel_encoding()
+
+    def test_image_qa(self):
+        set_default_backend(self.chat_vision_backend)
+        test_image_qa()
+
+    def test_stream(self):
+        set_default_backend(self.backend)
+        test_stream()
+
+
+if __name__ == "__main__":
+    unittest.main(warnings="ignore")
+
+    # from sglang.global_config import global_config
+
+    # global_config.verbosity = 2
+    # t = TestOpenAIBackend()
+    # t.setUp()
+    # t.test_decode_json()
--- a/test/lang/test_srt_backend.py
+++ b/test/lang/test_srt_backend.py
@@ -0,0 +1,74 @@
+"""
+python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
+"""
+import json
+import unittest
+
+from sglang.test.test_programs import (
+    test_decode_int,
+    test_decode_json,
+    test_expert_answer,
+    test_few_shot_qa,
+    test_mt_bench,
+    test_parallel_decoding,
+    test_parallel_encoding,
+    test_react,
+    test_regex,
+    test_select,
+    test_stream,
+    test_tool_use,
+)
+
+import sglang as sgl
+
+
+class TestSRTBackend(unittest.TestCase):
+    backend = None
+
+    def setUp(self):
+        cls = type(self)
+
+        if cls.backend is None:
+            cls.backend = sgl.RuntimeEndpoint(base_url="http://localhost:30000")
+            sgl.set_default_backend(cls.backend)
+
+    def test_few_shot_qa(self):
+        test_few_shot_qa()
+
+    def test_mt_bench(self):
+        test_mt_bench()
+
+    def test_select(self):
+        test_select(check_answer=False)
+
+    def test_decode_int(self):
+        test_decode_int()
+
+    def test_expert_answer(self):
+        test_expert_answer()
+
+    def test_tool_use(self):
+        test_tool_use()
+
+    def test_parallel_decoding(self):
+        test_parallel_decoding()
+
+    def test_stream(self):
+        test_stream()
+
+    def test_regex(self):
+        test_regex()
+
+    # def test_parallel_encoding(self):
+    #     test_parallel_encoding(check_answer=False)
+
+
+if __name__ == "__main__":
+    unittest.main(warnings="ignore")
+
+    # from sglang.global_config import global_config
+
+    # global_config.verbosity = 2
+    # t = TestSRTBackend()
+    # t.setUp()
+    # t.test_regex()
--- a/test/lang/test_tracing.py
+++ b/test/lang/test_tracing.py
@@ -0,0 +1,132 @@
+import unittest
+
+from sglang.backend.base_backend import BaseBackend
+from sglang.lang.chat_template import get_chat_template
+
+import sglang as sgl
+
+
+class TestTracing(unittest.TestCase):
+    def test_few_shot_qa(self):
+        @sgl.function
+        def few_shot_qa(s, question):
+            s += "The following are questions with answers.\n\n"
+            s += "Q: What is the capital of France?\n"
+            s += "A: Paris\n"
+            s += "Q: " + question + "\n"
+            s += "A:" + sgl.gen("answer", stop="\n")
+
+        tracer = few_shot_qa.trace()
+        print(tracer.last_node.print_graph_dfs() + "\n")
+
+    def test_select(self):
+        @sgl.function
+        def capital(s):
+            s += "The capital of France is"
+            s += sgl.select("capital", ["Paris. ", "London. "])
+            s += "It is a city" + sgl.gen("description", stop=".")
+
+        tracer = capital.trace()
+        print(tracer.last_node.print_graph_dfs() + "\n")
+
+    def test_raise_warning(self):
+        @sgl.function
+        def wrong(s, question):
+            s += f"I want to ask {question}"
+
+        try:
+            tracer = wrong.trace()
+            raised = False
+        except TypeError:
+            raised = True
+
+        assert raised
+
+    def test_multi_function(self):
+        @sgl.function
+        def expand(s, tip):
+            s += (
+                "Please expand the following tip into a detailed paragraph:"
+                + tip
+                + "\n"
+            )
+            s += sgl.gen("detailed_tip")
+
+        @sgl.function
+        def tip_suggestion(s, topic):
+            s += "Here are 2 tips for " + topic + ".\n"
+
+            s += "1." + sgl.gen("tip_1", stop=["\n", ":", "."]) + "\n"
+            s += "2." + sgl.gen("tip_2", stop=["\n", ":", "."]) + "\n"
+
+            branch1 = expand(tip=s["tip_1"])
+            branch2 = expand(tip=s["tip_2"])
+
+            s += "Tip 1: " + branch1["detailed_tip"] + "\n"
+            s += "Tip 2: " + branch2["detailed_tip"] + "\n"
+            s += "In summary" + sgl.gen("summary")
+
+        compiled = tip_suggestion.compile()
+        compiled.print_graph()
+
+        sgl.set_default_backend(sgl.OpenAI("gpt-3.5-turbo-instruct"))
+        state = compiled.run(topic="staying healthy")
+        print(state.text() + "\n")
+
+        states = compiled.run_batch(
+            [
+                {"topic": "staying healthy"},
+                {"topic": "staying happy"},
+                {"topic": "earning money"},
+            ],
+            temperature=0,
+        )
+        for s in states:
+            print(s.text() + "\n")
+
+    def test_role(self):
+        @sgl.function
+        def multi_turn_chat(s):
+            s += sgl.user("Who are you?")
+            s += sgl.assistant(sgl.gen("answer_1"))
+            s += sgl.user("Who created you?")
+            s += sgl.assistant(sgl.gen("answer_2"))
+
+        backend = BaseBackend()
+        backend.chat_template = get_chat_template("llama-2-chat")
+
+        compiled = multi_turn_chat.compile(backend=backend)
+        compiled.print_graph()
+
+    def test_fork(self):
+        @sgl.function
+        def tip_suggestion(s):
+            s += (
+                "Here are three tips for staying healthy: "
+                "1. Balanced Diet; "
+                "2. Regular Exercise; "
+                "3. Adequate Sleep\n"
+            )
+
+            forks = s.fork(3)
+            for i in range(3):
+                forks[i] += f"Now, expand tip {i+1} into a paragraph:\n"
+                forks[i] += sgl.gen(f"detailed_tip")
+
+            s += "Tip 1:" + forks[0]["detailed_tip"] + "\n"
+            s += "Tip 2:" + forks[1]["detailed_tip"] + "\n"
+            s += "Tip 3:" + forks[2]["detailed_tip"] + "\n"
+            s += "In summary" + sgl.gen("summary")
+
+        tracer = tip_suggestion.trace()
+        print(tracer.last_node.print_graph_dfs())
+
+        a = tip_suggestion.run(backend=sgl.OpenAI("gpt-3.5-turbo-instruct"))
+        print(a.text())
+
+
+if __name__ == "__main__":
+    unittest.main(warnings="ignore")
+
+    # t = TestTracing()
+    # t.test_fork()