adapt to sglang v0.5.2rc1 on dcu

This commit is contained in:
maxiao
2025-09-04 15:56:33 +08:00
commit 909abb58f5
2320 changed files with 489411 additions and 0 deletions

BIN
test/lang/example_image.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

38
test/lang/run_suite.py Normal file
View File

@@ -0,0 +1,38 @@
import argparse
import glob
from sglang.test.test_utils import TestFile, run_unittest_files
suites = {
"per-commit": [
TestFile("test_srt_backend.py"),
# Skip this due to some OPENAI_API_KEY issues
# "test_openai_backend.py",
],
}
if __name__ == "__main__":
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument(
"--timeout-per-file",
type=int,
default=1000,
help="The time limit for running one file in seconds.",
)
arg_parser.add_argument(
"--suite",
type=str,
default=list(suites.keys())[0],
choices=list(suites.keys()) + ["all"],
help="The suite to run",
)
args = arg_parser.parse_args()
if args.suite == "all":
files = glob.glob("**/test_*.py", recursive=True)
else:
files = suites[args.suite]
exit_code = run_unittest_files(files, args.timeout_per_file)
exit(exit_code)

View File

@@ -0,0 +1,25 @@
import json
import unittest
from sglang import Anthropic, set_default_backend
from sglang.test.test_programs import test_mt_bench, test_stream
from sglang.test.test_utils import CustomTestCase
class TestAnthropicBackend(CustomTestCase):
backend = None
@classmethod
def setUpClass(cls):
cls.backend = Anthropic("claude-3-haiku-20240307")
set_default_backend(cls.backend)
def test_mt_bench(self):
test_mt_bench()
def test_stream(self):
test_stream()
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,51 @@
import unittest
import sglang as sgl
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, CustomTestCase
class TestBind(CustomTestCase):
backend = None
@classmethod
def setUpClass(cls):
cls.backend = sgl.Runtime(model_path=DEFAULT_MODEL_NAME_FOR_TEST)
sgl.set_default_backend(cls.backend)
@classmethod
def tearDownClass(cls):
cls.backend.shutdown()
def test_bind(self):
@sgl.function
def few_shot_qa(s, prompt, question):
s += prompt
s += "Q: What is the capital of France?\n"
s += "A: Paris\n"
s += "Q: " + question + "\n"
s += "A:" + sgl.gen("answer", stop="\n")
few_shot_qa_2 = few_shot_qa.bind(
prompt="The following are questions with answers.\n\n"
)
tracer = few_shot_qa_2.trace()
print(tracer.last_node.print_graph_dfs() + "\n")
def test_cache(self):
@sgl.function
def few_shot_qa(s, prompt, question):
s += prompt
s += "Q: What is the capital of France?\n"
s += "A: Paris\n"
s += "Q: " + question + "\n"
s += "A:" + sgl.gen("answer", stop="\n")
few_shot_qa_2 = few_shot_qa.bind(
prompt="Answer the following questions as if you were a 5-year-old kid.\n\n"
)
few_shot_qa_2.cache(self.backend)
if __name__ == "__main__":
unittest.main()

91
test/lang/test_choices.py Normal file
View File

@@ -0,0 +1,91 @@
import unittest
import numpy as np
from sglang.lang.choices import (
greedy_token_selection,
token_length_normalized,
unconditional_likelihood_normalized,
)
from sglang.test.test_utils import CustomTestCase
MOCK_CHOICES_INPUT_DATA = {
"choices": [
"organ", # ["organ"]
"organism", # ["organ", "ism"]
"antidisestablishmentarianism", # ["ant", "id", "is", "est", "ablish", "ment", "arian", "ism"]
],
"normalized_prompt_logprobs": [-0.1, -0.2, -0.05],
"input_token_logprobs": [
[[-0.1, 1, None]],
[[-0.1, 1, None], [-0.3, 2, None]],
[
[-0.4, 3, None],
[-0.25, 4, None],
[-0.1, 5, None],
[-0.01, 6, None],
[-0.01, 7, None],
[-0.01, 8, None],
[-0.01, 9, None],
[-0.01, 2, None],
],
],
"output_token_logprobs": [
[[-0.1, 10, None]],
[[-0.1, 10, None]],
[[-0.1, 10, None]],
],
"unconditional_token_logprobs": [
[[None, 1, None]],
[[None, 1, None], [-1.4, 2, None]],
[
[None, 3, None],
[-0.25, 4, None],
[-0.1, 5, None],
[-0.01, 6, None],
[-0.01, 7, None],
[-0.01, 8, None],
[-0.01, 9, None],
[-0.01, 2, None],
],
],
}
class TestChoices(CustomTestCase):
def test_token_length_normalized(self):
"""Confirm 'antidisestablishmentarianism' is selected due to high confidences for
its later tokens resulting in highest token length normalized prompt logprob."""
decision = token_length_normalized(**MOCK_CHOICES_INPUT_DATA)
assert decision.decision == "antidisestablishmentarianism"
def test_greedy_token_selection(self):
"""Confirm 'organ' is selected due it having the joint highest initial token
logprob, and a higher average logprob than organism's second token."""
decision = greedy_token_selection(**MOCK_CHOICES_INPUT_DATA)
assert decision.decision == "organ"
assert np.allclose(
decision.meta_info["greedy_logprob_matrix"],
[
[-0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1, -0.1],
[-0.1, -0.3, -0.2, -0.2, -0.2, -0.2, -0.2, -0.2],
[-0.4, -0.25, -0.1, -0.01, -0.01, -0.01, -0.01, -0.01],
],
atol=0.01,
)
def test_unconditional_likelihood_normalized(self):
"""Confirm 'organism' is selected due to it having the highest average token logprob
once normalized by the unconditional token logprobs."""
decision = unconditional_likelihood_normalized(**MOCK_CHOICES_INPUT_DATA)
assert decision.decision == "organism"
assert np.allclose(
decision.meta_info["normalized_unconditional_prompt_logprobs"],
[-0.1, 0.5, -0.05],
atol=0.01,
)
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,25 @@
import json
import unittest
from sglang import LiteLLM, set_default_backend
from sglang.test.test_programs import test_mt_bench, test_stream
from sglang.test.test_utils import CustomTestCase
class TestAnthropicBackend(CustomTestCase):
chat_backend = None
@classmethod
def setUpClass(cls):
cls.chat_backend = LiteLLM("gpt-3.5-turbo")
set_default_backend(cls.chat_backend)
def test_mt_bench(self):
test_mt_bench()
def test_stream(self):
test_stream()
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,92 @@
import unittest
from sglang import OpenAI, set_default_backend
from sglang.test.test_programs import (
test_chat_completion_speculative,
test_completion_speculative,
test_decode_int,
test_decode_json,
test_expert_answer,
test_few_shot_qa,
test_image_qa,
test_mt_bench,
test_parallel_decoding,
test_parallel_encoding,
test_react,
test_select,
test_stream,
test_tool_use,
)
from sglang.test.test_utils import CustomTestCase
class TestOpenAIBackend(CustomTestCase):
instruct_backend = None
chat_backend = None
chat_vision_backend = None
@classmethod
def setUpClass(cls):
cls.instruct_backend = OpenAI("gpt-3.5-turbo-instruct")
cls.chat_backend = OpenAI("gpt-3.5-turbo")
cls.chat_vision_backend = OpenAI("gpt-4-turbo")
def test_few_shot_qa(self):
set_default_backend(self.instruct_backend)
test_few_shot_qa()
def test_mt_bench(self):
set_default_backend(self.chat_backend)
test_mt_bench()
def test_select(self):
set_default_backend(self.instruct_backend)
test_select(check_answer=True)
def test_decode_int(self):
set_default_backend(self.instruct_backend)
test_decode_int()
def test_decode_json(self):
set_default_backend(self.instruct_backend)
test_decode_json()
def test_expert_answer(self):
set_default_backend(self.instruct_backend)
test_expert_answer()
def test_tool_use(self):
set_default_backend(self.instruct_backend)
test_tool_use()
def test_react(self):
set_default_backend(self.instruct_backend)
test_react()
def test_parallel_decoding(self):
set_default_backend(self.instruct_backend)
test_parallel_decoding()
def test_parallel_encoding(self):
set_default_backend(self.instruct_backend)
test_parallel_encoding()
def test_image_qa(self):
set_default_backend(self.chat_vision_backend)
test_image_qa()
def test_stream(self):
set_default_backend(self.instruct_backend)
test_stream()
def test_completion_speculative(self):
set_default_backend(self.instruct_backend)
test_completion_speculative()
def test_chat_completion_speculative(self):
set_default_backend(self.chat_backend)
test_chat_completion_speculative()
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,68 @@
"""
Tests for the separate_reasoning functionality in sglang.
Usage:
python3 -m unittest test/lang/test_separate_reasoning.py
"""
import unittest
from sglang import assistant, gen, separate_reasoning, user
from sglang.lang.ir import SglExprList, SglSeparateReasoning
from sglang.test.test_utils import CustomTestCase
class TestSeparateReasoning(CustomTestCase):
def test_separate_reasoning_creation(self):
"""Test that SglSeparateReasoning objects are created correctly."""
# Test with valid model type and gen expression
test_gen = gen("test")
expr = separate_reasoning(test_gen, model_type="deepseek-r1")
self.assertIsInstance(expr, SglExprList)
self.assertEqual(len(expr.expr_list), 2)
self.assertEqual(expr.expr_list[0], test_gen)
reasoning_expr = expr.expr_list[1]
self.assertIsInstance(reasoning_expr, SglSeparateReasoning)
self.assertEqual(reasoning_expr.model_type, "deepseek-r1")
self.assertEqual(reasoning_expr.name, "test_reasoning_content")
# Test with another valid model type
expr = separate_reasoning(test_gen, model_type="qwen3")
self.assertIsInstance(expr, SglExprList)
self.assertEqual(expr.expr_list[1].model_type, "qwen3")
def test_separate_reasoning_name_processing(self):
"""Test that separate_reasoning correctly processes names."""
test_gen = gen("test_var")
expr = separate_reasoning(test_gen, model_type="deepseek-r1")
reasoning_expr = expr.expr_list[1]
self.assertEqual(reasoning_expr.name, "test_var_reasoning_content")
# Test the process_name_for_reasoning method
self.assertEqual(
reasoning_expr.process_name_for_reasoning("another_var"),
"another_var_reasoning_content",
)
def test_separate_reasoning_repr(self):
"""Test the string representation of SglSeparateReasoning."""
test_gen = gen("test_var")
expr = separate_reasoning(test_gen, model_type="deepseek-r1")
reasoning_expr = expr.expr_list[1]
self.assertEqual(
repr(reasoning_expr),
"SeparateReasoning(model_type=deepseek-r1, name=test_var_reasoning_content)",
)
def test_separate_reasoning_with_invalid_model_type(self):
"""Test that separate_reasoning accepts any model type during creation."""
# Create with invalid model type
test_gen = gen("test")
expr = separate_reasoning(test_gen, model_type="invalid-model")
self.assertIsInstance(expr, SglExprList)
self.assertEqual(expr.expr_list[1].model_type, "invalid-model")
# The actual validation happens in the ReasoningParser constructor
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,195 @@
"""
Tests for the execution of separate_reasoning functionality in sglang.
Usage:
python3 -m unittest test/lang/test_separate_reasoning_execution.py
"""
import threading
import time
import unittest
from unittest.mock import MagicMock, patch
from sglang import assistant, gen, separate_reasoning, user
from sglang.lang.interpreter import StreamExecutor
from sglang.lang.ir import SglGen, SglSeparateReasoning
from sglang.test.test_utils import CustomTestCase
# Helper function to create events that won't block program exit
def create_daemon_event():
event = threading.Event()
return event
class MockReasoningParser:
def __init__(self, model_type):
self.model_type = model_type
self.parse_non_stream_called = False
self.parse_stream_chunk_called = False
def parse_non_stream(self, full_text):
self.parse_non_stream_called = True
# Simulate parsing by adding a prefix to indicate reasoning
reasoning = f"[REASONING from {self.model_type}]: {full_text}"
normal_text = f"[NORMAL from {self.model_type}]: {full_text}"
return reasoning, normal_text
def parse_stream_chunk(self, chunk_text):
self.parse_stream_chunk_called = True
# Simulate parsing by adding a prefix to indicate reasoning
reasoning = f"[REASONING from {self.model_type}]: {chunk_text}"
normal_text = f"[NORMAL from {self.model_type}]: {chunk_text}"
return reasoning, normal_text
class TestSeparateReasoningExecution(CustomTestCase):
def setUp(self):
"""Set up for the test."""
super().setUp()
# Store any events created during the test
self.events = []
def tearDown(self):
"""Clean up any threads that might have been created during the test."""
super().tearDown()
# Set all events to ensure any waiting threads are released
for event in self.events:
event.set()
def tearDown(self):
super().tearDown()
# wake up all threads
for ev in self.events:
ev.set()
@patch("sglang.srt.parser.reasoning_parser.ReasoningParser")
def test_execute_separate_reasoning(self, mock_parser_class):
"""Test that _execute_separate_reasoning correctly calls the ReasoningParser."""
# Setup mock parser
mock_parser = MockReasoningParser("deepseek-r1")
mock_parser_class.return_value = mock_parser
# Create a mock backend to avoid AttributeError in __del__
mock_backend = MagicMock()
# Create a StreamExecutor with necessary setup
executor = StreamExecutor(
backend=mock_backend,
arguments={},
default_sampling_para={},
chat_template={
"role_map": {"user": "user", "assistant": "assistant"}
}, # Simple chat template
stream=False,
use_thread=False,
)
# Set up the executor with a variable and its value
var_name = "test_var"
reasoning_name = f"{var_name}_reasoning_content"
var_value = "Test content"
executor.variables = {var_name: var_value}
# Create events and track them for cleanup
var_event = create_daemon_event()
reasoning_event = create_daemon_event()
self.events.extend([var_event, reasoning_event])
executor.variable_event = {var_name: var_event, reasoning_name: reasoning_event}
executor.variable_event[var_name].set() # Mark as ready
# Set up the current role
executor.cur_role = "assistant"
executor.cur_role_begin_pos = 0
executor.text_ = var_value
# Create a gen expression and a separate_reasoning expression
gen_expr = SglGen(var_name)
expr = SglSeparateReasoning("deepseek-r1", expr=gen_expr)
# Execute separate_reasoning
executor._execute_separate_reasoning(expr)
# Verify that the parser was created with the correct model type
mock_parser_class.assert_called_once_with("deepseek-r1")
# Verify that parse_non_stream was called
self.assertTrue(mock_parser.parse_non_stream_called)
# Verify that the variables were updated correctly
reasoning_name = f"{var_name}_reasoning_content"
self.assertIn(reasoning_name, executor.variables)
self.assertEqual(
executor.variables[reasoning_name],
f"[REASONING from deepseek-r1]: {var_value}",
)
self.assertEqual(
executor.variables[var_name], f"[NORMAL from deepseek-r1]: {var_value}"
)
# Verify that the variable event was set
self.assertIn(reasoning_name, executor.variable_event)
self.assertTrue(executor.variable_event[reasoning_name].is_set())
# Verify that the text was updated
self.assertEqual(executor.text_, f"[NORMAL from deepseek-r1]: {var_value}")
@patch("sglang.srt.parser.reasoning_parser.ReasoningParser")
def test_reasoning_parser_integration(self, mock_parser_class):
"""Test the integration between separate_reasoning and ReasoningParser."""
# Setup mock parsers for different model types
deepseek_parser = MockReasoningParser("deepseek-r1")
qwen_parser = MockReasoningParser("qwen3")
# Configure the mock to return different parsers based on model type
def get_parser(model_type):
if model_type == "deepseek-r1":
return deepseek_parser
elif model_type == "qwen3":
return qwen_parser
else:
raise ValueError(f"Unsupported model type: {model_type}")
mock_parser_class.side_effect = get_parser
# Test with DeepSeek-R1 model
test_text = "This is a test"
reasoning, normal_text = deepseek_parser.parse_non_stream(test_text)
self.assertEqual(reasoning, f"[REASONING from deepseek-r1]: {test_text}")
self.assertEqual(normal_text, f"[NORMAL from deepseek-r1]: {test_text}")
# Test with Qwen3 model
reasoning, normal_text = qwen_parser.parse_non_stream(test_text)
self.assertEqual(reasoning, f"[REASONING from qwen3]: {test_text}")
self.assertEqual(normal_text, f"[NORMAL from qwen3]: {test_text}")
@patch("sglang.srt.parser.reasoning_parser.ReasoningParser")
def test_reasoning_parser_invalid_model(self, mock_parser_class):
"""Test that ReasoningParser raises an error for invalid model types."""
# Configure the mock to raise an error for invalid model types
def get_parser(model_type):
if model_type in ["deepseek-r1", "qwen3"]:
return MockReasoningParser(model_type)
elif model_type is None:
raise ValueError("Model type must be specified")
else:
raise ValueError(f"Unsupported model type: {model_type}")
mock_parser_class.side_effect = get_parser
with self.assertRaises(ValueError) as context:
mock_parser_class("invalid-model")
self.assertIn("Unsupported model type", str(context.exception))
with self.assertRaises(ValueError) as context:
mock_parser_class(None)
self.assertIn("Model type must be specified", str(context.exception))
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,86 @@
"""
Usage:
python3 -m unittest test_srt_backend.TestSRTBackend.test_gen_min_new_tokens
python3 -m unittest test_srt_backend.TestSRTBackend.test_hellaswag_select
"""
import unittest
import sglang as sgl
from sglang.test.test_programs import (
test_decode_int,
test_decode_json_regex,
test_dtype_gen,
test_expert_answer,
test_few_shot_qa,
test_gen_min_new_tokens,
test_hellaswag_select,
test_mt_bench,
test_parallel_decoding,
test_regex,
test_select,
test_stream,
test_tool_use,
)
from sglang.test.test_utils import DEFAULT_MODEL_NAME_FOR_TEST, CustomTestCase
class TestSRTBackend(CustomTestCase):
backend = None
@classmethod
def setUpClass(cls):
cls.backend = sgl.Runtime(
model_path=DEFAULT_MODEL_NAME_FOR_TEST, cuda_graph_max_bs=4
)
sgl.set_default_backend(cls.backend)
@classmethod
def tearDownClass(cls):
cls.backend.shutdown()
def test_few_shot_qa(self):
test_few_shot_qa()
def test_mt_bench(self):
test_mt_bench()
def test_select(self):
test_select(check_answer=False)
def test_decode_int(self):
test_decode_int()
def test_decode_json_regex(self):
test_decode_json_regex()
def test_expert_answer(self):
test_expert_answer()
def test_tool_use(self):
test_tool_use()
def test_parallel_decoding(self):
test_parallel_decoding()
def test_stream(self):
test_stream()
def test_regex(self):
test_regex()
def test_dtype_gen(self):
test_dtype_gen()
def test_hellaswag_select(self):
# Run twice to capture more bugs
for _ in range(2):
accuracy, latency = test_hellaswag_select()
self.assertGreater(accuracy, 0.60)
def test_gen_min_new_tokens(self):
test_gen_min_new_tokens()
if __name__ == "__main__":
unittest.main()

129
test/lang/test_tracing.py Normal file
View File

@@ -0,0 +1,129 @@
import unittest
import sglang as sgl
from sglang.lang.backend.base_backend import BaseBackend
from sglang.lang.chat_template import get_chat_template
from sglang.test.test_utils import CustomTestCase
class TestTracing(CustomTestCase):
def test_few_shot_qa(self):
@sgl.function
def few_shot_qa(s, question):
s += "The following are questions with answers.\n\n"
s += "Q: What is the capital of France?\n"
s += "A: Paris\n"
s += "Q: " + question + "\n"
s += "A:" + sgl.gen("answer", stop="\n")
tracer = few_shot_qa.trace()
# print(tracer.last_node.print_graph_dfs() + "\n")
def test_select(self):
@sgl.function
def capital(s):
s += "The capital of France is"
s += sgl.select("capital", ["Paris. ", "London. "])
s += "It is a city" + sgl.gen("description", stop=".")
tracer = capital.trace()
# print(tracer.last_node.print_graph_dfs() + "\n")
def test_raise_warning(self):
@sgl.function
def wrong(s, question):
s += f"I want to ask {question}"
try:
tracer = wrong.trace()
raised = False
except TypeError:
raised = True
assert raised
def test_multi_function(self):
@sgl.function
def expand(s, tip):
s += (
"Please expand the following tip into a detailed paragraph:"
+ tip
+ "\n"
)
s += sgl.gen("detailed_tip")
@sgl.function
def tip_suggestion(s, topic):
s += "Here are 2 tips for " + topic + ".\n"
s += "1." + sgl.gen("tip_1", stop=["\n", ":", "."]) + "\n"
s += "2." + sgl.gen("tip_2", stop=["\n", ":", "."]) + "\n"
branch1 = expand(tip=s["tip_1"])
branch2 = expand(tip=s["tip_2"])
s += "Tip 1: " + branch1["detailed_tip"] + "\n"
s += "Tip 2: " + branch2["detailed_tip"] + "\n"
s += "In summary" + sgl.gen("summary")
compiled = tip_suggestion.compile()
# compiled.print_graph()
sgl.set_default_backend(sgl.OpenAI("gpt-3.5-turbo-instruct"))
state = compiled.run(topic="staying healthy")
# print(state.text() + "\n")
states = compiled.run_batch(
[
{"topic": "staying healthy"},
{"topic": "staying happy"},
{"topic": "earning money"},
],
temperature=0,
)
# for s in states:
# print(s.text() + "\n")
def test_role(self):
@sgl.function
def multi_turn_chat(s):
s += sgl.user("Who are you?")
s += sgl.assistant(sgl.gen("answer_1"))
s += sgl.user("Who created you?")
s += sgl.assistant(sgl.gen("answer_2"))
backend = BaseBackend()
backend.chat_template = get_chat_template("llama-2-chat")
compiled = multi_turn_chat.compile(backend=backend)
# compiled.print_graph()
def test_fork(self):
@sgl.function
def tip_suggestion(s):
s += (
"Here are three tips for staying healthy: "
"1. Balanced Diet; "
"2. Regular Exercise; "
"3. Adequate Sleep\n"
)
forks = s.fork(3)
for i in range(3):
forks[i] += f"Now, expand tip {i+1} into a paragraph:\n"
forks[i] += sgl.gen(f"detailed_tip")
s += "Tip 1:" + forks[0]["detailed_tip"] + "\n"
s += "Tip 2:" + forks[1]["detailed_tip"] + "\n"
s += "Tip 3:" + forks[2]["detailed_tip"] + "\n"
s += "In summary" + sgl.gen("summary")
tracer = tip_suggestion.trace()
# print(tracer.last_node.print_graph_dfs())
a = tip_suggestion.run(backend=sgl.OpenAI("gpt-3.5-turbo-instruct"))
# print(a.text())
if __name__ == "__main__":
unittest.main()

View File

@@ -0,0 +1,53 @@
import unittest
from sglang import VertexAI, set_default_backend
from sglang.test.test_programs import (
test_expert_answer,
test_few_shot_qa,
test_image_qa,
test_mt_bench,
test_parallel_decoding,
test_parallel_encoding,
test_stream,
)
from sglang.test.test_utils import CustomTestCase
class TestVertexAIBackend(CustomTestCase):
backend = None
@classmethod
def setUpClass(cls):
cls.backend = VertexAI("gemini-1.5-pro-001")
def test_few_shot_qa(self):
set_default_backend(self.backend)
test_few_shot_qa()
def test_mt_bench(self):
set_default_backend(self.backend)
test_mt_bench()
def test_expert_answer(self):
set_default_backend(self.backend)
test_expert_answer(check_answer=False)
def test_parallel_decoding(self):
set_default_backend(self.backend)
test_parallel_decoding()
def test_parallel_encoding(self):
set_default_backend(self.backend)
test_parallel_encoding()
def test_image_qa(self):
set_default_backend(self.backend)
test_image_qa()
def test_stream(self):
set_default_backend(self.backend)
test_stream()
if __name__ == "__main__":
unittest.main()