adapt to sglang v0.5.2rc1 on dcu

2025-09-04 15:56:33 +08:00
commit 909abb58f5
2320 changed files with 489411 additions and 0 deletions
--- a/test/lang/example_image.png
+++ b/test/lang/example_image.png
--- a/test/lang/run_suite.py
+++ b/test/lang/run_suite.py
@@ -0,0 +1,38 @@
+import argparse
+import glob
+
+from sglang.test.test_utils import TestFile, run_unittest_files
+
+suites = {
+    "per-commit": [
+        TestFile("test_srt_backend.py"),
+        # Skip this due to some OPENAI_API_KEY issues
+        # "test_openai_backend.py",
+    ],
+}
+
+
+if __name__ == "__main__":
+    arg_parser = argparse.ArgumentParser()
+    arg_parser.add_argument(
+        "--timeout-per-file",
+        type=int,
+        default=1000,
+        help="The time limit for running one file in seconds.",
+    )
+    arg_parser.add_argument(
+        "--suite",
+        type=str,
+        default=list(suites.keys())[0],
+        choices=list(suites.keys()) + ["all"],
+        help="The suite to run",
+    )
+    args = arg_parser.parse_args()
+
+    if args.suite == "all":
+        files = glob.glob("**/test_*.py", recursive=True)
+    else:
+        files = suites[args.suite]
+
+    exit_code = run_unittest_files(files, args.timeout_per_file)
+    exit(exit_code)
--- a/test/lang/test_anthropic_backend.py
+++ b/test/lang/test_anthropic_backend.py
@@ -0,0 +1,25 @@
+import json
+import unittest
+
+from sglang import Anthropic, set_default_backend
+from sglang.test.test_programs import test_mt_bench, test_stream
+from sglang.test.test_utils import CustomTestCase
+
+
+class TestAnthropicBackend(CustomTestCase):
+    backend = None
+
+    @classmethod
+    def setUpClass(cls):
+        cls.backend = Anthropic("claude-3-haiku-20240307")
+        set_default_backend(cls.backend)
+
+    def test_mt_bench(self):
+        test_mt_bench()
+
+    def test_stream(self):
+        test_stream()
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/lang/test_bind_cache.py
+++ b/test/lang/test_bind_cache.py
@@ -0,0 +1,51 @@
+import unittest
+
+import sglang as sgl
+from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, CustomTestCase
+
+
+class TestBind(CustomTestCase):
+    backend = None
+
+    @classmethod
+    def setUpClass(cls):
+        cls.backend = sgl.Runtime(model_path=DEFAULT_MODEL_NAME_FOR_TEST)
+        sgl.set_default_backend(cls.backend)
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.backend.shutdown()
+
+    def test_bind(self):
+        @sgl.function
+        def few_shot_qa(s, prompt, question):
+            s += prompt
+            s += "Q: What is the capital of France?\n"
+            s += "A: Paris\n"
+            s += "Q: " + question + "\n"
+            s += "A:" + sgl.gen("answer", stop="\n")
+
+        few_shot_qa_2 = few_shot_qa.bind(
+            prompt="The following are questions with answers.\n\n"
+        )
+
+        tracer = few_shot_qa_2.trace()
+        print(tracer.last_node.print_graph_dfs() + "\n")
+
+    def test_cache(self):
+        @sgl.function
+        def few_shot_qa(s, prompt, question):
+            s += prompt
+            s += "Q: What is the capital of France?\n"
+            s += "A: Paris\n"
+            s += "Q: " + question + "\n"
+            s += "A:" + sgl.gen("answer", stop="\n")
+
+        few_shot_qa_2 = few_shot_qa.bind(
+            prompt="Answer the following questions as if you were a 5-year-old kid.\n\n"
+        )
+        few_shot_qa_2.cache(self.backend)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/lang/test_choices.py
+++ b/test/lang/test_choices.py
@@ -0,0 +1,91 @@
+import unittest
+
+import numpy as np
+
+from sglang.lang.choices import (
+    greedy_token_selection,
+    token_length_normalized,
+    unconditional_likelihood_normalized,
+)
+from sglang.test.test_utils import CustomTestCase
+
+MOCK_CHOICES_INPUT_DATA = {
+    "choices": [
+        "organ",  # ["organ"]
+        "organism",  # ["organ", "ism"]
+        "antidisestablishmentarianism",  # ["ant", "id", "is", "est", "ablish", "ment", "arian", "ism"]
+    ],
+    "normalized_prompt_logprobs": [-0.1, -0.2, -0.05],
+    "input_token_logprobs": [
+        [[-0.1, 1, None]],
+        [[-0.1, 1, None], [-0.3, 2, None]],
+        [
+            [-0.4, 3, None],
+            [-0.25, 4, None],
+            [-0.1, 5, None],
+            [-0.01, 6, None],
+            [-0.01, 7, None],
+            [-0.01, 8, None],
+            [-0.01, 9, None],
+            [-0.01, 2, None],
+        ],
+    ],
+    "output_token_logprobs": [
+        [[-0.1, 10, None]],
+        [[-0.1, 10, None]],
+        [[-0.1, 10, None]],
+    ],
+    "unconditional_token_logprobs": [
+        [[None, 1, None]],
+        [[None, 1, None], [-1.4, 2, None]],
+        [
+            [None, 3, None],
+            [-0.25, 4, None],
+            [-0.1, 5, None],
+            [-0.01, 6, None],
+            [-0.01, 7, None],
+            [-0.01, 8, None],
+            [-0.01, 9, None],
+            [-0.01, 2, None],
+        ],
+    ],
+}
+
+
+class TestChoices(CustomTestCase):
+
+    def test_token_length_normalized(self):
+        """Confirm 'antidisestablishmentarianism' is selected due to high confidences for
+        its later tokens resulting in highest token length normalized prompt logprob."""
+        decision = token_length_normalized(**MOCK_CHOICES_INPUT_DATA)
+        assert decision.decision == "antidisestablishmentarianism"
+
+    def test_greedy_token_selection(self):
+        """Confirm 'organ' is selected due it having the joint highest initial token
+        logprob, and a higher average logprob than organism's second token."""
+        decision = greedy_token_selection(**MOCK_CHOICES_INPUT_DATA)
+        assert decision.decision == "organ"
+        assert np.allclose(
+            decision.meta_info["greedy_logprob_matrix"],
+            [
+                [-0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1],
+                [-0.1, -0.3, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2],
+                [-0.4, -0.25, -0.1, -0.01, -0.01, -0.01, -0.01, -0.01],
+            ],
+            atol=0.01,
+        )
+
+    def test_unconditional_likelihood_normalized(self):
+        """Confirm 'organism' is selected due to it having the highest average token logprob
+        once normalized by the unconditional token logprobs."""
+        decision = unconditional_likelihood_normalized(**MOCK_CHOICES_INPUT_DATA)
+        assert decision.decision == "organism"
+        assert np.allclose(
+            decision.meta_info["normalized_unconditional_prompt_logprobs"],
+            [-0.1, 0.5, -0.05],
+            atol=0.01,
+        )
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/lang/test_litellm_backend.py
+++ b/test/lang/test_litellm_backend.py
@@ -0,0 +1,25 @@
+import json
+import unittest
+
+from sglang import LiteLLM, set_default_backend
+from sglang.test.test_programs import test_mt_bench, test_stream
+from sglang.test.test_utils import CustomTestCase
+
+
+class TestAnthropicBackend(CustomTestCase):
+    chat_backend = None
+
+    @classmethod
+    def setUpClass(cls):
+        cls.chat_backend = LiteLLM("gpt-3.5-turbo")
+        set_default_backend(cls.chat_backend)
+
+    def test_mt_bench(self):
+        test_mt_bench()
+
+    def test_stream(self):
+        test_stream()
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/lang/test_openai_backend.py
+++ b/test/lang/test_openai_backend.py
@@ -0,0 +1,92 @@
+import unittest
+
+from sglang import OpenAI, set_default_backend
+from sglang.test.test_programs import (
+    test_chat_completion_speculative,
+    test_completion_speculative,
+    test_decode_int,
+    test_decode_json,
+    test_expert_answer,
+    test_few_shot_qa,
+    test_image_qa,
+    test_mt_bench,
+    test_parallel_decoding,
+    test_parallel_encoding,
+    test_react,
+    test_select,
+    test_stream,
+    test_tool_use,
+)
+from sglang.test.test_utils import CustomTestCase
+
+
+class TestOpenAIBackend(CustomTestCase):
+    instruct_backend = None
+    chat_backend = None
+    chat_vision_backend = None
+
+    @classmethod
+    def setUpClass(cls):
+        cls.instruct_backend = OpenAI("gpt-3.5-turbo-instruct")
+        cls.chat_backend = OpenAI("gpt-3.5-turbo")
+        cls.chat_vision_backend = OpenAI("gpt-4-turbo")
+
+    def test_few_shot_qa(self):
+        set_default_backend(self.instruct_backend)
+        test_few_shot_qa()
+
+    def test_mt_bench(self):
+        set_default_backend(self.chat_backend)
+        test_mt_bench()
+
+    def test_select(self):
+        set_default_backend(self.instruct_backend)
+        test_select(check_answer=True)
+
+    def test_decode_int(self):
+        set_default_backend(self.instruct_backend)
+        test_decode_int()
+
+    def test_decode_json(self):
+        set_default_backend(self.instruct_backend)
+        test_decode_json()
+
+    def test_expert_answer(self):
+        set_default_backend(self.instruct_backend)
+        test_expert_answer()
+
+    def test_tool_use(self):
+        set_default_backend(self.instruct_backend)
+        test_tool_use()
+
+    def test_react(self):
+        set_default_backend(self.instruct_backend)
+        test_react()
+
+    def test_parallel_decoding(self):
+        set_default_backend(self.instruct_backend)
+        test_parallel_decoding()
+
+    def test_parallel_encoding(self):
+        set_default_backend(self.instruct_backend)
+        test_parallel_encoding()
+
+    def test_image_qa(self):
+        set_default_backend(self.chat_vision_backend)
+        test_image_qa()
+
+    def test_stream(self):
+        set_default_backend(self.instruct_backend)
+        test_stream()
+
+    def test_completion_speculative(self):
+        set_default_backend(self.instruct_backend)
+        test_completion_speculative()
+
+    def test_chat_completion_speculative(self):
+        set_default_backend(self.chat_backend)
+        test_chat_completion_speculative()
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/lang/test_separate_reasoning.py
+++ b/test/lang/test_separate_reasoning.py
@@ -0,0 +1,68 @@
+"""
+Tests for the separate_reasoning functionality in sglang.
+
+Usage:
+python3 -m unittest test/lang/test_separate_reasoning.py
+"""
+
+import unittest
+
+from sglang import assistant, gen, separate_reasoning, user
+from sglang.lang.ir import SglExprList, SglSeparateReasoning
+from sglang.test.test_utils import CustomTestCase
+
+
+class TestSeparateReasoning(CustomTestCase):
+    def test_separate_reasoning_creation(self):
+        """Test that SglSeparateReasoning objects are created correctly."""
+        # Test with valid model type and gen expression
+        test_gen = gen("test")
+        expr = separate_reasoning(test_gen, model_type="deepseek-r1")
+        self.assertIsInstance(expr, SglExprList)
+        self.assertEqual(len(expr.expr_list), 2)
+        self.assertEqual(expr.expr_list[0], test_gen)
+        reasoning_expr = expr.expr_list[1]
+        self.assertIsInstance(reasoning_expr, SglSeparateReasoning)
+        self.assertEqual(reasoning_expr.model_type, "deepseek-r1")
+        self.assertEqual(reasoning_expr.name, "test_reasoning_content")
+
+        # Test with another valid model type
+        expr = separate_reasoning(test_gen, model_type="qwen3")
+        self.assertIsInstance(expr, SglExprList)
+        self.assertEqual(expr.expr_list[1].model_type, "qwen3")
+
+    def test_separate_reasoning_name_processing(self):
+        """Test that separate_reasoning correctly processes names."""
+        test_gen = gen("test_var")
+        expr = separate_reasoning(test_gen, model_type="deepseek-r1")
+        reasoning_expr = expr.expr_list[1]
+        self.assertEqual(reasoning_expr.name, "test_var_reasoning_content")
+
+        # Test the process_name_for_reasoning method
+        self.assertEqual(
+            reasoning_expr.process_name_for_reasoning("another_var"),
+            "another_var_reasoning_content",
+        )
+
+    def test_separate_reasoning_repr(self):
+        """Test the string representation of SglSeparateReasoning."""
+        test_gen = gen("test_var")
+        expr = separate_reasoning(test_gen, model_type="deepseek-r1")
+        reasoning_expr = expr.expr_list[1]
+        self.assertEqual(
+            repr(reasoning_expr),
+            "SeparateReasoning(model_type=deepseek-r1, name=test_var_reasoning_content)",
+        )
+
+    def test_separate_reasoning_with_invalid_model_type(self):
+        """Test that separate_reasoning accepts any model type during creation."""
+        # Create with invalid model type
+        test_gen = gen("test")
+        expr = separate_reasoning(test_gen, model_type="invalid-model")
+        self.assertIsInstance(expr, SglExprList)
+        self.assertEqual(expr.expr_list[1].model_type, "invalid-model")
+        # The actual validation happens in the ReasoningParser constructor
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/lang/test_separate_reasoning_execution.py
+++ b/test/lang/test_separate_reasoning_execution.py
@@ -0,0 +1,195 @@
+"""
+Tests for the execution of separate_reasoning functionality in sglang.
+
+Usage:
+python3 -m unittest test/lang/test_separate_reasoning_execution.py
+"""
+
+import threading
+import time
+import unittest
+from unittest.mock import MagicMock, patch
+
+from sglang import assistant, gen, separate_reasoning, user
+from sglang.lang.interpreter import StreamExecutor
+from sglang.lang.ir import SglGen, SglSeparateReasoning
+from sglang.test.test_utils import CustomTestCase
+
+
+# Helper function to create events that won't block program exit
+def create_daemon_event():
+    event = threading.Event()
+    return event
+
+
+class MockReasoningParser:
+    def __init__(self, model_type):
+        self.model_type = model_type
+        self.parse_non_stream_called = False
+        self.parse_stream_chunk_called = False
+
+    def parse_non_stream(self, full_text):
+        self.parse_non_stream_called = True
+        # Simulate parsing by adding a prefix to indicate reasoning
+        reasoning = f"[REASONING from {self.model_type}]: {full_text}"
+        normal_text = f"[NORMAL from {self.model_type}]: {full_text}"
+        return reasoning, normal_text
+
+    def parse_stream_chunk(self, chunk_text):
+        self.parse_stream_chunk_called = True
+        # Simulate parsing by adding a prefix to indicate reasoning
+        reasoning = f"[REASONING from {self.model_type}]: {chunk_text}"
+        normal_text = f"[NORMAL from {self.model_type}]: {chunk_text}"
+        return reasoning, normal_text
+
+
+class TestSeparateReasoningExecution(CustomTestCase):
+    def setUp(self):
+        """Set up for the test."""
+        super().setUp()
+        # Store any events created during the test
+        self.events = []
+
+    def tearDown(self):
+        """Clean up any threads that might have been created during the test."""
+        super().tearDown()
+
+        # Set all events to ensure any waiting threads are released
+        for event in self.events:
+            event.set()
+
+    def tearDown(self):
+        super().tearDown()
+        # wake up all threads
+        for ev in self.events:
+            ev.set()
+
+    @patch("sglang.srt.parser.reasoning_parser.ReasoningParser")
+    def test_execute_separate_reasoning(self, mock_parser_class):
+        """Test that _execute_separate_reasoning correctly calls the ReasoningParser."""
+        # Setup mock parser
+        mock_parser = MockReasoningParser("deepseek-r1")
+        mock_parser_class.return_value = mock_parser
+
+        # Create a mock backend to avoid AttributeError in __del__
+        mock_backend = MagicMock()
+
+        # Create a StreamExecutor with necessary setup
+        executor = StreamExecutor(
+            backend=mock_backend,
+            arguments={},
+            default_sampling_para={},
+            chat_template={
+                "role_map": {"user": "user", "assistant": "assistant"}
+            },  # Simple chat template
+            stream=False,
+            use_thread=False,
+        )
+
+        # Set up the executor with a variable and its value
+        var_name = "test_var"
+        reasoning_name = f"{var_name}_reasoning_content"
+        var_value = "Test content"
+        executor.variables = {var_name: var_value}
+
+        # Create events and track them for cleanup
+        var_event = create_daemon_event()
+        reasoning_event = create_daemon_event()
+        self.events.extend([var_event, reasoning_event])
+
+        executor.variable_event = {var_name: var_event, reasoning_name: reasoning_event}
+        executor.variable_event[var_name].set()  # Mark as ready
+
+        # Set up the current role
+        executor.cur_role = "assistant"
+        executor.cur_role_begin_pos = 0
+        executor.text_ = var_value
+
+        # Create a gen expression and a separate_reasoning expression
+        gen_expr = SglGen(var_name)
+        expr = SglSeparateReasoning("deepseek-r1", expr=gen_expr)
+
+        # Execute separate_reasoning
+        executor._execute_separate_reasoning(expr)
+
+        # Verify that the parser was created with the correct model type
+        mock_parser_class.assert_called_once_with("deepseek-r1")
+
+        # Verify that parse_non_stream was called
+        self.assertTrue(mock_parser.parse_non_stream_called)
+
+        # Verify that the variables were updated correctly
+        reasoning_name = f"{var_name}_reasoning_content"
+        self.assertIn(reasoning_name, executor.variables)
+        self.assertEqual(
+            executor.variables[reasoning_name],
+            f"[REASONING from deepseek-r1]: {var_value}",
+        )
+        self.assertEqual(
+            executor.variables[var_name], f"[NORMAL from deepseek-r1]: {var_value}"
+        )
+
+        # Verify that the variable event was set
+        self.assertIn(reasoning_name, executor.variable_event)
+        self.assertTrue(executor.variable_event[reasoning_name].is_set())
+
+        # Verify that the text was updated
+        self.assertEqual(executor.text_, f"[NORMAL from deepseek-r1]: {var_value}")
+
+    @patch("sglang.srt.parser.reasoning_parser.ReasoningParser")
+    def test_reasoning_parser_integration(self, mock_parser_class):
+        """Test the integration between separate_reasoning and ReasoningParser."""
+        # Setup mock parsers for different model types
+        deepseek_parser = MockReasoningParser("deepseek-r1")
+        qwen_parser = MockReasoningParser("qwen3")
+
+        # Configure the mock to return different parsers based on model type
+        def get_parser(model_type):
+            if model_type == "deepseek-r1":
+                return deepseek_parser
+            elif model_type == "qwen3":
+                return qwen_parser
+            else:
+                raise ValueError(f"Unsupported model type: {model_type}")
+
+        mock_parser_class.side_effect = get_parser
+
+        # Test with DeepSeek-R1 model
+        test_text = "This is a test"
+        reasoning, normal_text = deepseek_parser.parse_non_stream(test_text)
+
+        self.assertEqual(reasoning, f"[REASONING from deepseek-r1]: {test_text}")
+        self.assertEqual(normal_text, f"[NORMAL from deepseek-r1]: {test_text}")
+
+        # Test with Qwen3 model
+        reasoning, normal_text = qwen_parser.parse_non_stream(test_text)
+
+        self.assertEqual(reasoning, f"[REASONING from qwen3]: {test_text}")
+        self.assertEqual(normal_text, f"[NORMAL from qwen3]: {test_text}")
+
+    @patch("sglang.srt.parser.reasoning_parser.ReasoningParser")
+    def test_reasoning_parser_invalid_model(self, mock_parser_class):
+        """Test that ReasoningParser raises an error for invalid model types."""
+
+        # Configure the mock to raise an error for invalid model types
+        def get_parser(model_type):
+            if model_type in ["deepseek-r1", "qwen3"]:
+                return MockReasoningParser(model_type)
+            elif model_type is None:
+                raise ValueError("Model type must be specified")
+            else:
+                raise ValueError(f"Unsupported model type: {model_type}")
+
+        mock_parser_class.side_effect = get_parser
+
+        with self.assertRaises(ValueError) as context:
+            mock_parser_class("invalid-model")
+        self.assertIn("Unsupported model type", str(context.exception))
+
+        with self.assertRaises(ValueError) as context:
+            mock_parser_class(None)
+        self.assertIn("Model type must be specified", str(context.exception))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/lang/test_srt_backend.py
+++ b/test/lang/test_srt_backend.py
@@ -0,0 +1,86 @@
+"""
+Usage:
+python3 -m unittest test_srt_backend.TestSRTBackend.test_gen_min_new_tokens
+python3 -m unittest test_srt_backend.TestSRTBackend.test_hellaswag_select
+"""
+
+import unittest
+
+import sglang as sgl
+from sglang.test.test_programs import (
+    test_decode_int,
+    test_decode_json_regex,
+    test_dtype_gen,
+    test_expert_answer,
+    test_few_shot_qa,
+    test_gen_min_new_tokens,
+    test_hellaswag_select,
+    test_mt_bench,
+    test_parallel_decoding,
+    test_regex,
+    test_select,
+    test_stream,
+    test_tool_use,
+)
+from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, CustomTestCase
+
+
+class TestSRTBackend(CustomTestCase):
+    backend = None
+
+    @classmethod
+    def setUpClass(cls):
+        cls.backend = sgl.Runtime(
+            model_path=DEFAULT_MODEL_NAME_FOR_TEST, cuda_graph_max_bs=4
+        )
+        sgl.set_default_backend(cls.backend)
+
+    @classmethod
+    def tearDownClass(cls):
+        cls.backend.shutdown()
+
+    def test_few_shot_qa(self):
+        test_few_shot_qa()
+
+    def test_mt_bench(self):
+        test_mt_bench()
+
+    def test_select(self):
+        test_select(check_answer=False)
+
+    def test_decode_int(self):
+        test_decode_int()
+
+    def test_decode_json_regex(self):
+        test_decode_json_regex()
+
+    def test_expert_answer(self):
+        test_expert_answer()
+
+    def test_tool_use(self):
+        test_tool_use()
+
+    def test_parallel_decoding(self):
+        test_parallel_decoding()
+
+    def test_stream(self):
+        test_stream()
+
+    def test_regex(self):
+        test_regex()
+
+    def test_dtype_gen(self):
+        test_dtype_gen()
+
+    def test_hellaswag_select(self):
+        # Run twice to capture more bugs
+        for _ in range(2):
+            accuracy, latency = test_hellaswag_select()
+            self.assertGreater(accuracy, 0.60)
+
+    def test_gen_min_new_tokens(self):
+        test_gen_min_new_tokens()
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/lang/test_tracing.py
+++ b/test/lang/test_tracing.py
@@ -0,0 +1,129 @@
+import unittest
+
+import sglang as sgl
+from sglang.lang.backend.base_backend import BaseBackend
+from sglang.lang.chat_template import get_chat_template
+from sglang.test.test_utils import CustomTestCase
+
+
+class TestTracing(CustomTestCase):
+    def test_few_shot_qa(self):
+        @sgl.function
+        def few_shot_qa(s, question):
+            s += "The following are questions with answers.\n\n"
+            s += "Q: What is the capital of France?\n"
+            s += "A: Paris\n"
+            s += "Q: " + question + "\n"
+            s += "A:" + sgl.gen("answer", stop="\n")
+
+        tracer = few_shot_qa.trace()
+        # print(tracer.last_node.print_graph_dfs() + "\n")
+
+    def test_select(self):
+        @sgl.function
+        def capital(s):
+            s += "The capital of France is"
+            s += sgl.select("capital", ["Paris. ", "London. "])
+            s += "It is a city" + sgl.gen("description", stop=".")
+
+        tracer = capital.trace()
+        # print(tracer.last_node.print_graph_dfs() + "\n")
+
+    def test_raise_warning(self):
+        @sgl.function
+        def wrong(s, question):
+            s += f"I want to ask {question}"
+
+        try:
+            tracer = wrong.trace()
+            raised = False
+        except TypeError:
+            raised = True
+
+        assert raised
+
+    def test_multi_function(self):
+        @sgl.function
+        def expand(s, tip):
+            s += (
+                "Please expand the following tip into a detailed paragraph:"
+                + tip
+                + "\n"
+            )
+            s += sgl.gen("detailed_tip")
+
+        @sgl.function
+        def tip_suggestion(s, topic):
+            s += "Here are 2 tips for " + topic + ".\n"
+
+            s += "1." + sgl.gen("tip_1", stop=["\n", ":", "."]) + "\n"
+            s += "2." + sgl.gen("tip_2", stop=["\n", ":", "."]) + "\n"
+
+            branch1 = expand(tip=s["tip_1"])
+            branch2 = expand(tip=s["tip_2"])
+
+            s += "Tip 1: " + branch1["detailed_tip"] + "\n"
+            s += "Tip 2: " + branch2["detailed_tip"] + "\n"
+            s += "In summary" + sgl.gen("summary")
+
+        compiled = tip_suggestion.compile()
+        # compiled.print_graph()
+
+        sgl.set_default_backend(sgl.OpenAI("gpt-3.5-turbo-instruct"))
+        state = compiled.run(topic="staying healthy")
+        # print(state.text() + "\n")
+
+        states = compiled.run_batch(
+            [
+                {"topic": "staying healthy"},
+                {"topic": "staying happy"},
+                {"topic": "earning money"},
+            ],
+            temperature=0,
+        )
+        # for s in states:
+        #     print(s.text() + "\n")
+
+    def test_role(self):
+        @sgl.function
+        def multi_turn_chat(s):
+            s += sgl.user("Who are you?")
+            s += sgl.assistant(sgl.gen("answer_1"))
+            s += sgl.user("Who created you?")
+            s += sgl.assistant(sgl.gen("answer_2"))
+
+        backend = BaseBackend()
+        backend.chat_template = get_chat_template("llama-2-chat")
+
+        compiled = multi_turn_chat.compile(backend=backend)
+        # compiled.print_graph()
+
+    def test_fork(self):
+        @sgl.function
+        def tip_suggestion(s):
+            s += (
+                "Here are three tips for staying healthy: "
+                "1. Balanced Diet; "
+                "2. Regular Exercise; "
+                "3. Adequate Sleep\n"
+            )
+
+            forks = s.fork(3)
+            for i in range(3):
+                forks[i] += f"Now, expand tip {i+1} into a paragraph:\n"
+                forks[i] += sgl.gen(f"detailed_tip")
+
+            s += "Tip 1:" + forks[0]["detailed_tip"] + "\n"
+            s += "Tip 2:" + forks[1]["detailed_tip"] + "\n"
+            s += "Tip 3:" + forks[2]["detailed_tip"] + "\n"
+            s += "In summary" + sgl.gen("summary")
+
+        tracer = tip_suggestion.trace()
+        # print(tracer.last_node.print_graph_dfs())
+
+        a = tip_suggestion.run(backend=sgl.OpenAI("gpt-3.5-turbo-instruct"))
+        # print(a.text())
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/test/lang/test_vertexai_backend.py
+++ b/test/lang/test_vertexai_backend.py
@@ -0,0 +1,53 @@
+import unittest
+
+from sglang import VertexAI, set_default_backend
+from sglang.test.test_programs import (
+    test_expert_answer,
+    test_few_shot_qa,
+    test_image_qa,
+    test_mt_bench,
+    test_parallel_decoding,
+    test_parallel_encoding,
+    test_stream,
+)
+from sglang.test.test_utils import CustomTestCase
+
+
+class TestVertexAIBackend(CustomTestCase):
+    backend = None
+
+    @classmethod
+    def setUpClass(cls):
+        cls.backend = VertexAI("gemini-1.5-pro-001")
+
+    def test_few_shot_qa(self):
+        set_default_backend(self.backend)
+        test_few_shot_qa()
+
+    def test_mt_bench(self):
+        set_default_backend(self.backend)
+        test_mt_bench()
+
+    def test_expert_answer(self):
+        set_default_backend(self.backend)
+        test_expert_answer(check_answer=False)
+
+    def test_parallel_decoding(self):
+        set_default_backend(self.backend)
+        test_parallel_decoding()
+
+    def test_parallel_encoding(self):
+        set_default_backend(self.backend)
+        test_parallel_encoding()
+
+    def test_image_qa(self):
+        set_default_backend(self.backend)
+        test_image_qa()
+
+    def test_stream(self):
+        set_default_backend(self.backend)
+        test_stream()
+
+
+if __name__ == "__main__":
+    unittest.main()