Sync from v0.13
This commit is contained in:
0
tests/v1/structured_output/__init__.py
Normal file
0
tests/v1/structured_output/__init__.py
Normal file
192
tests/v1/structured_output/test_backend_guidance.py
Normal file
192
tests/v1/structured_output/test_backend_guidance.py
Normal file
@@ -0,0 +1,192 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import time
|
||||
from concurrent.futures import Future
|
||||
|
||||
import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from vllm.config import StructuredOutputsConfig, VllmConfig
|
||||
from vllm.config.model import ModelConfig
|
||||
from vllm.config.parallel import ParallelConfig
|
||||
from vllm.config.speculative import SpeculativeConfig
|
||||
from vllm.sampling_params import SamplingParams, StructuredOutputsParams
|
||||
from vllm.v1.request import Request
|
||||
from vllm.v1.structured_output import StructuredOutputManager
|
||||
from vllm.v1.structured_output.backend_guidance import GuidanceBackend
|
||||
from vllm.v1.structured_output.backend_types import StructuredOutputOptions
|
||||
|
||||
TOKENIZER = "gpt2"
|
||||
|
||||
|
||||
def test_backend_guidance_rollback_terminated():
|
||||
# Test that the backend guidance successfully rollbacks from a
|
||||
# terminated state. This can happen with speculative decoding,
|
||||
# where the draft model proposes EOS and it is verified by the
|
||||
# guidance backend. In that case we are in a stopped state, but
|
||||
# it should be reverted in case EOS is not accepted by the target
|
||||
# model.
|
||||
vllm_config = VllmConfig(
|
||||
decoding_config=StructuredOutputsConfig(
|
||||
backend="guidance",
|
||||
)
|
||||
)
|
||||
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER)
|
||||
|
||||
backend = GuidanceBackend(
|
||||
vllm_config,
|
||||
tokenizer=tokenizer,
|
||||
vocab_size=50257,
|
||||
)
|
||||
|
||||
grammar = backend.compile_grammar(
|
||||
StructuredOutputOptions.JSON, '{"type": "object"}'
|
||||
)
|
||||
|
||||
prompt = tokenizer.encode('{"a": "b"}')
|
||||
assert len(prompt) > 1
|
||||
dummy_wrong = tokenizer.encode('{"a"}')
|
||||
for token in prompt:
|
||||
assert grammar.accept_tokens("", [token])
|
||||
assert not grammar.is_terminated()
|
||||
assert grammar.accept_tokens("", [tokenizer.eos_token_id])
|
||||
assert grammar.is_terminated()
|
||||
# Giving any other token should also be accepted
|
||||
assert grammar.accept_tokens("", dummy_wrong)
|
||||
# Rollback is done from where state was terminated, so from '}' not EOS
|
||||
grammar.rollback(len(prompt) - 1)
|
||||
assert not grammar.is_terminated()
|
||||
assert grammar.validate_tokens([tokenizer.eos_token_id]) == []
|
||||
assert grammar.validate_tokens(dummy_wrong) != dummy_wrong
|
||||
assert grammar.accept_tokens("", prompt[1:])
|
||||
assert not grammar.is_terminated()
|
||||
assert grammar.accept_tokens("", [tokenizer.eos_token_id])
|
||||
assert grammar.is_terminated()
|
||||
# Rollback of <= 0 should not change the terminated state
|
||||
grammar.rollback(0)
|
||||
assert grammar.is_terminated()
|
||||
grammar.rollback(-1)
|
||||
assert grammar.is_terminated()
|
||||
|
||||
|
||||
def test_grammar_bitmask_with_specdec():
|
||||
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER)
|
||||
prompt = tokenizer.encode('{"a": "b"}')
|
||||
vllm_config = VllmConfig(
|
||||
model_config=ModelConfig(tokenizer=TOKENIZER),
|
||||
structured_outputs_config=StructuredOutputsConfig(backend="guidance"),
|
||||
speculative_config=SpeculativeConfig(model="[ngram]", num_speculative_tokens=3),
|
||||
)
|
||||
structured_output_manager = StructuredOutputManager(vllm_config)
|
||||
|
||||
for i in range(1, 2):
|
||||
sampling_params = SamplingParams(
|
||||
structured_outputs=StructuredOutputsParams(
|
||||
json='{"type": "object"}',
|
||||
),
|
||||
)
|
||||
sampling_params.structured_outputs._backend = "guidance"
|
||||
|
||||
my_req_id = f"my_req_id_{i}"
|
||||
request = Request(
|
||||
my_req_id,
|
||||
prompt_token_ids=prompt[:i],
|
||||
sampling_params=sampling_params,
|
||||
pooling_params=None,
|
||||
eos_token_id=tokenizer.eos_token_id,
|
||||
)
|
||||
|
||||
structured_output_manager.grammar_init(request)
|
||||
|
||||
def grammar_bitmask(req: Request, tokens: list[int]) -> None:
|
||||
structured_output_manager.grammar_bitmask(
|
||||
requests={req.request_id: req},
|
||||
structured_output_request_ids={req.request_id: 0},
|
||||
scheduled_spec_decode_tokens={req.request_id: tokens},
|
||||
)
|
||||
# At this point, we rolled-back, so should not be terminated
|
||||
assert not req.structured_output_request.grammar.is_terminated()
|
||||
|
||||
# The grammar might not yet be compiled, so we wait for it
|
||||
while not request.structured_output_request._check_grammar_completion():
|
||||
continue
|
||||
|
||||
assert request.structured_output_request.grammar.accept_tokens(
|
||||
request.request_id, prompt[:i]
|
||||
)
|
||||
|
||||
grammar_bitmask(request, prompt[i:] + [tokenizer.eos_token_id])
|
||||
grammar_bitmask(
|
||||
request, prompt[i:] + [tokenizer.eos_token_id] + prompt
|
||||
) # EOS not the final token
|
||||
grammar_bitmask(request, prompt[i:]) # EOS not present
|
||||
grammar_bitmask(request, prompt[i:] + [tokenizer.eos_token_id])
|
||||
|
||||
|
||||
@pytest.mark.parametrize("async_grammar", [True, False])
|
||||
def test_grammar_init_async_and_sync(async_grammar):
|
||||
"""Test grammar initialization works correctly in both async and sync modes.
|
||||
|
||||
This test validates that the distributed_executor_backend config option
|
||||
correctly controls whether grammar compilation happens asynchronously
|
||||
(via executor.submit) or synchronously. When set to "external_launcher",
|
||||
grammar compilation is synchronous to avoid deadlocks.
|
||||
"""
|
||||
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER)
|
||||
prompt = tokenizer.encode('{"a": "b"}')
|
||||
|
||||
# Use "external_launcher" for sync mode, None for async mode
|
||||
executor_backend = None if async_grammar else "external_launcher"
|
||||
vllm_config = VllmConfig(
|
||||
model_config=ModelConfig(tokenizer=TOKENIZER),
|
||||
structured_outputs_config=StructuredOutputsConfig(backend="guidance"),
|
||||
parallel_config=ParallelConfig(distributed_executor_backend=executor_backend),
|
||||
)
|
||||
structured_output_manager = StructuredOutputManager(vllm_config)
|
||||
|
||||
sampling_params = SamplingParams(
|
||||
structured_outputs=StructuredOutputsParams(
|
||||
json='{"type": "object"}',
|
||||
),
|
||||
)
|
||||
sampling_params.structured_outputs._backend = "guidance"
|
||||
|
||||
request = Request(
|
||||
"test_request",
|
||||
prompt_token_ids=prompt,
|
||||
sampling_params=sampling_params,
|
||||
pooling_params=None,
|
||||
eos_token_id=tokenizer.eos_token_id,
|
||||
)
|
||||
|
||||
structured_output_manager.grammar_init(request)
|
||||
|
||||
# Check the internal _grammar type immediately after init
|
||||
# Before _check_grammar_completion is called, async mode should have a Future
|
||||
raw_grammar = request.structured_output_request._grammar
|
||||
if async_grammar:
|
||||
assert isinstance(raw_grammar, Future), (
|
||||
"Async mode should store a Future before completion"
|
||||
)
|
||||
else:
|
||||
assert not isinstance(raw_grammar, Future), (
|
||||
"Sync mode should store the grammar directly, not a Future"
|
||||
)
|
||||
|
||||
# Wait for grammar to be ready (handles both async and sync cases)
|
||||
start_time = time.time()
|
||||
while not request.structured_output_request._check_grammar_completion():
|
||||
if time.time() - start_time > 5: # 5-second timeout
|
||||
pytest.fail("Grammar compilation timed out")
|
||||
time.sleep(0.01)
|
||||
|
||||
# After completion, _grammar should no longer be a Future
|
||||
assert not isinstance(request.structured_output_request._grammar, Future)
|
||||
|
||||
# Verify grammar is properly initialized and functional
|
||||
grammar = request.structured_output_request.grammar
|
||||
assert grammar is not None
|
||||
assert not grammar.is_terminated()
|
||||
|
||||
# Verify the grammar can accept valid tokens
|
||||
assert grammar.accept_tokens(request.request_id, prompt)
|
||||
172
tests/v1/structured_output/test_gptoss_structural_tags.py
Normal file
172
tests/v1/structured_output/test_gptoss_structural_tags.py
Normal file
@@ -0,0 +1,172 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
"""Unit tests for GPT-OSS structural tag support in reasoning (PR #25515)."""
|
||||
|
||||
import json
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.entrypoints.tool_server import ToolServer
|
||||
from vllm.reasoning.gptoss_reasoning_parser import (
|
||||
GptOssReasoningParser,
|
||||
from_builtin_tool_to_tag,
|
||||
no_func_reaonsing_tag,
|
||||
tag_with_builtin_funcs,
|
||||
)
|
||||
|
||||
|
||||
class TestGptOssReasoningParser:
|
||||
"""Test cases for GptOssReasoningParser structural tag functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_tokenizer(self):
|
||||
"""Create a mock tokenizer for testing."""
|
||||
tokenizer = Mock()
|
||||
tokenizer.encode = Mock(return_value=[1, 2, 3, 4, 5])
|
||||
return tokenizer
|
||||
|
||||
@pytest.fixture
|
||||
def reasoning_parser(self, mock_tokenizer):
|
||||
"""Create a GptOssReasoningParser instance."""
|
||||
return GptOssReasoningParser(mock_tokenizer)
|
||||
|
||||
@pytest.fixture
|
||||
def mock_tool_server_empty(self):
|
||||
"""Create a mock ToolServer with no tools."""
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
tool_server.has_tool = Mock(return_value=False)
|
||||
return tool_server
|
||||
|
||||
@pytest.fixture
|
||||
def mock_tool_server_with_browser(self):
|
||||
"""Create a mock ToolServer with browser tool."""
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
tool_server.has_tool = Mock(side_effect=lambda tool: tool == "browser")
|
||||
return tool_server
|
||||
|
||||
@pytest.fixture
|
||||
def mock_tool_server_with_all_tools(self):
|
||||
"""Create a mock ToolServer with all builtin tools."""
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
tool_server.has_tool = Mock(
|
||||
side_effect=lambda tool: tool in ["browser", "python", "container"]
|
||||
)
|
||||
return tool_server
|
||||
|
||||
def test_prepare_structured_tag_no_tool_server(self, reasoning_parser):
|
||||
"""Test prepare_structured_tag with no tool server."""
|
||||
result = reasoning_parser.prepare_structured_tag(None, None)
|
||||
expected = json.dumps(no_func_reaonsing_tag)
|
||||
|
||||
assert result == expected
|
||||
|
||||
# Verify the structure is correct
|
||||
parsed = json.loads(result)
|
||||
assert parsed["type"] == "structural_tag"
|
||||
assert parsed["format"]["type"] == "triggered_tags"
|
||||
assert len(parsed["format"]["tags"]) == 1
|
||||
assert parsed["format"]["tags"][0]["begin"] == "<|channel|>analysis<|message|>"
|
||||
assert parsed["format"]["triggers"] == ["<|channel|>analysis"]
|
||||
|
||||
def test_prepare_structured_tag_with_all_tools(
|
||||
self, reasoning_parser, mock_tool_server_with_all_tools
|
||||
):
|
||||
"""Test prepare_structured_tag with all builtin tools."""
|
||||
result = reasoning_parser.prepare_structured_tag(
|
||||
None, mock_tool_server_with_all_tools
|
||||
)
|
||||
parsed = json.loads(result)
|
||||
|
||||
# Should have analysis tag + tags for all 3 tools (2 tags each)
|
||||
assert len(parsed["format"]["tags"]) == 7 # 1 analysis + 6 tool tags
|
||||
|
||||
# Check all tool tags are present
|
||||
tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]]
|
||||
for tool in ["browser", "python", "container"]:
|
||||
assert f"<|channel|>commentary to={tool}" in tag_begins
|
||||
assert f"<|channel|>analysis to={tool}" in tag_begins
|
||||
|
||||
def test_prepare_structured_tag_with_original_tag(self, reasoning_parser):
|
||||
"""Test prepare_structured_tag when original_tag is provided."""
|
||||
original_tag = '{"custom": "tag"}'
|
||||
result = reasoning_parser.prepare_structured_tag(original_tag, None)
|
||||
|
||||
# Should return the original tag unchanged
|
||||
assert result == original_tag
|
||||
|
||||
def test_from_builtin_tool_to_tag(self):
|
||||
"""Test from_builtin_tool_to_tag function."""
|
||||
tags = from_builtin_tool_to_tag("python")
|
||||
|
||||
assert len(tags) == 2
|
||||
assert tags[0]["begin"] == "<|channel|>commentary to=python"
|
||||
assert tags[0]["content"]["type"] == "any_text"
|
||||
assert tags[0]["end"] == "<|end|>"
|
||||
|
||||
assert tags[1]["begin"] == "<|channel|>analysis to=python"
|
||||
assert tags[1]["content"]["type"] == "any_text"
|
||||
assert tags[1]["end"] == "<|end|>"
|
||||
|
||||
def test_tag_with_builtin_funcs(self):
|
||||
"""Test tag_with_builtin_funcs function."""
|
||||
builtin_tools = ["browser", "python"]
|
||||
result = tag_with_builtin_funcs(no_func_reaonsing_tag, builtin_tools)
|
||||
|
||||
assert result["type"] == "structural_tag"
|
||||
# Should have original analysis tag + 2 tags per tool
|
||||
assert len(result["format"]["tags"]) == 5 # 1 + 2*2
|
||||
|
||||
# Should have added commentary trigger
|
||||
assert "<|channel|>commentary to=" in result["format"]["triggers"]
|
||||
assert "<|channel|>analysis" in result["format"]["triggers"]
|
||||
|
||||
def test_tag_structure_invariants(self):
|
||||
"""Test that the basic tag structure follows expected format."""
|
||||
# Test the base no_func_reaonsing_tag structure
|
||||
assert no_func_reaonsing_tag["type"] == "structural_tag"
|
||||
assert no_func_reaonsing_tag["format"]["type"] == "triggered_tags"
|
||||
assert no_func_reaonsing_tag["format"]["stop_after_first"] is False
|
||||
|
||||
# Verify analysis tag structure
|
||||
analysis_tag = no_func_reaonsing_tag["format"]["tags"][0]
|
||||
assert analysis_tag["begin"] == "<|channel|>analysis<|message|>"
|
||||
assert analysis_tag["content"]["type"] == "any_text"
|
||||
assert analysis_tag["end"] == "<|end|>"
|
||||
|
||||
def test_json_serialization_valid(
|
||||
self, reasoning_parser, mock_tool_server_with_all_tools
|
||||
):
|
||||
"""Test that all generated tags produce valid JSON."""
|
||||
# Test with no tool server
|
||||
result1 = reasoning_parser.prepare_structured_tag(None, None)
|
||||
json.loads(result1) # Should not raise
|
||||
|
||||
# Test with empty tool server
|
||||
empty_server = Mock(spec=ToolServer)
|
||||
empty_server.has_tool = Mock(return_value=False)
|
||||
result2 = reasoning_parser.prepare_structured_tag(None, empty_server)
|
||||
json.loads(result2) # Should not raise
|
||||
|
||||
# Test with tools
|
||||
result3 = reasoning_parser.prepare_structured_tag(
|
||||
None, mock_tool_server_with_all_tools
|
||||
)
|
||||
json.loads(result3) # Should not raise
|
||||
|
||||
@pytest.mark.parametrize("tool_name", ["browser", "python", "container"])
|
||||
def test_single_tool_integration(self, reasoning_parser, tool_name):
|
||||
"""Test integration with individual tools."""
|
||||
tool_server = Mock(spec=ToolServer)
|
||||
tool_server.has_tool = Mock(side_effect=lambda tool: tool == tool_name)
|
||||
|
||||
result = reasoning_parser.prepare_structured_tag(None, tool_server)
|
||||
parsed = json.loads(result)
|
||||
|
||||
# Should have 1 analysis + 2 tool-specific tags
|
||||
assert len(parsed["format"]["tags"]) == 3
|
||||
|
||||
tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]]
|
||||
assert f"<|channel|>commentary to={tool_name}" in tag_begins
|
||||
assert f"<|channel|>analysis to={tool_name}" in tag_begins
|
||||
208
tests/v1/structured_output/test_reasoning_structured_output.py
Normal file
208
tests/v1/structured_output/test_reasoning_structured_output.py
Normal file
@@ -0,0 +1,208 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
"""Unit tests for reasoning-aware structured output functionality (PR #25515)."""
|
||||
|
||||
from unittest.mock import Mock
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.config import ModelConfig, SchedulerConfig, VllmConfig
|
||||
from vllm.reasoning import ReasoningParser
|
||||
from vllm.v1.request import Request
|
||||
from vllm.v1.structured_output import StructuredOutputManager
|
||||
|
||||
|
||||
class TestReasoningStructuredOutput:
|
||||
"""Test reasoning-aware structured output functionality."""
|
||||
|
||||
@pytest.fixture
|
||||
def mock_model_config(self):
|
||||
"""Create a mock ModelConfig."""
|
||||
config = Mock(spec=ModelConfig)
|
||||
config.skip_tokenizer_init = True # Skip tokenizer init to avoid network calls
|
||||
config.get_vocab_size = Mock(return_value=50000)
|
||||
# Add missing runner_type attribute that tokenizer initialization expects
|
||||
config.runner_type = "generate"
|
||||
# Add other attributes that tokenizer initialization might need
|
||||
config.tokenizer = "test-tokenizer"
|
||||
config.tokenizer_mode = "auto"
|
||||
config.trust_remote_code = False
|
||||
config.tokenizer_revision = None
|
||||
return config
|
||||
|
||||
@pytest.fixture
|
||||
def mock_scheduler_config(self):
|
||||
"""Create a mock SchedulerConfig."""
|
||||
config = Mock(spec=SchedulerConfig)
|
||||
config.max_num_seqs = 128
|
||||
return config
|
||||
|
||||
@pytest.fixture
|
||||
def mock_vllm_config(self, mock_model_config, mock_scheduler_config):
|
||||
"""Create a mock VllmConfig."""
|
||||
config = Mock(spec=VllmConfig)
|
||||
config.model_config = mock_model_config
|
||||
config.scheduler_config = mock_scheduler_config
|
||||
config.structured_outputs_config = Mock()
|
||||
config.structured_outputs_config.reasoning_parser = None
|
||||
config.structured_outputs_config.enable_in_reasoning = False
|
||||
config.speculative_config = None
|
||||
return config
|
||||
|
||||
@pytest.fixture
|
||||
def mock_reasoning_parser(self):
|
||||
"""Create a mock ReasoningParser."""
|
||||
parser = Mock(spec=ReasoningParser)
|
||||
parser.is_reasoning_end = Mock(return_value=False)
|
||||
return parser
|
||||
|
||||
@pytest.fixture
|
||||
def mock_request_with_structured_output(self):
|
||||
"""Create a mock request with structured output."""
|
||||
request = Mock(spec=Request)
|
||||
request.structured_output_request = Mock()
|
||||
request.structured_output_request.reasoning_ended = None
|
||||
request.structured_output_request.grammar = Mock()
|
||||
request.structured_output_request.grammar.is_terminated = Mock(
|
||||
return_value=False
|
||||
)
|
||||
request.use_structured_output = True
|
||||
request.prompt_token_ids = [1, 2, 3, 4, 5]
|
||||
request.all_token_ids = [1, 2, 3, 4, 5, 6, 7, 8]
|
||||
request.num_computed_tokens = 5
|
||||
return request
|
||||
|
||||
def test_should_fill_bitmask_with_enable_in_reasoning(
|
||||
self, mock_vllm_config, mock_request_with_structured_output
|
||||
):
|
||||
"""Test should_fill_bitmask when enable_in_reasoning is True."""
|
||||
# Enable enable_in_reasoning
|
||||
mock_vllm_config.structured_outputs_config.enable_in_reasoning = True
|
||||
|
||||
manager = StructuredOutputManager(mock_vllm_config)
|
||||
|
||||
# Should always return True when enable_in_reasoning is enabled
|
||||
result = manager.should_fill_bitmask(mock_request_with_structured_output)
|
||||
assert result is True
|
||||
|
||||
def test_should_fill_bitmask_without_enable_in_reasoning(
|
||||
self,
|
||||
mock_vllm_config,
|
||||
mock_request_with_structured_output,
|
||||
mock_reasoning_parser,
|
||||
):
|
||||
"""Test should_fill_bitmask when enable_in_reasoning is False."""
|
||||
# Keep enable_in_reasoning as False (default)
|
||||
config = mock_vllm_config.structured_outputs_config
|
||||
assert config.enable_in_reasoning is False
|
||||
|
||||
manager = StructuredOutputManager(mock_vllm_config)
|
||||
manager.reasoner = mock_reasoning_parser
|
||||
|
||||
# Mock reasoning not ended
|
||||
mock_reasoning_parser.is_reasoning_end.return_value = False
|
||||
|
||||
result = manager.should_fill_bitmask(mock_request_with_structured_output)
|
||||
|
||||
# Should set reasoning_ended and return its value
|
||||
assert (
|
||||
mock_request_with_structured_output.structured_output_request.reasoning_ended
|
||||
is False
|
||||
)
|
||||
assert result is False
|
||||
|
||||
def test_should_fill_bitmask_no_reasoner(
|
||||
self, mock_vllm_config, mock_request_with_structured_output
|
||||
):
|
||||
"""Test should_fill_bitmask when no reasoner is configured."""
|
||||
manager = StructuredOutputManager(mock_vllm_config)
|
||||
manager.reasoner = None
|
||||
|
||||
result = manager.should_fill_bitmask(mock_request_with_structured_output)
|
||||
|
||||
# Should default to True when no reasoner
|
||||
assert result is True
|
||||
|
||||
def test_should_advance_with_enable_in_reasoning(
|
||||
self,
|
||||
mock_vllm_config,
|
||||
mock_request_with_structured_output,
|
||||
mock_reasoning_parser,
|
||||
):
|
||||
"""Test should_advance when enable_in_reasoning is True."""
|
||||
# Enable enable_in_reasoning
|
||||
mock_vllm_config.structured_outputs_config.enable_in_reasoning = True
|
||||
|
||||
manager = StructuredOutputManager(mock_vllm_config)
|
||||
manager.reasoner = mock_reasoning_parser
|
||||
|
||||
# Should always return True when enable_in_reasoning is enabled
|
||||
result = manager.should_advance(mock_request_with_structured_output)
|
||||
assert result is True
|
||||
|
||||
def test_should_advance_reasoning_not_ended(
|
||||
self,
|
||||
mock_vllm_config,
|
||||
mock_request_with_structured_output,
|
||||
mock_reasoning_parser,
|
||||
):
|
||||
"""Test should_advance when reasoning has not ended."""
|
||||
manager = StructuredOutputManager(mock_vllm_config)
|
||||
manager.reasoner = mock_reasoning_parser
|
||||
|
||||
# Set reasoning as not ended
|
||||
(
|
||||
mock_request_with_structured_output.structured_output_request
|
||||
).reasoning_ended = False
|
||||
mock_reasoning_parser.is_reasoning_end.return_value = False
|
||||
|
||||
result = manager.should_advance(mock_request_with_structured_output)
|
||||
|
||||
# Should return False since reasoning hasn't ended
|
||||
assert result is False
|
||||
|
||||
def test_should_advance_reasoning_just_ended(
|
||||
self,
|
||||
mock_vllm_config,
|
||||
mock_request_with_structured_output,
|
||||
mock_reasoning_parser,
|
||||
):
|
||||
"""Test should_advance when reasoning ends in current step."""
|
||||
manager = StructuredOutputManager(mock_vllm_config)
|
||||
manager.reasoner = mock_reasoning_parser
|
||||
|
||||
# Set reasoning as not ended initially, but ends in this step
|
||||
(
|
||||
mock_request_with_structured_output.structured_output_request
|
||||
).reasoning_ended = False
|
||||
mock_reasoning_parser.is_reasoning_end.return_value = True
|
||||
|
||||
result = manager.should_advance(mock_request_with_structured_output)
|
||||
|
||||
# Should set reasoning_ended to True but return False for this step
|
||||
assert (
|
||||
mock_request_with_structured_output.structured_output_request.reasoning_ended
|
||||
is True
|
||||
)
|
||||
assert result is False
|
||||
|
||||
def test_should_advance_reasoning_already_ended(
|
||||
self,
|
||||
mock_vllm_config,
|
||||
mock_request_with_structured_output,
|
||||
mock_reasoning_parser,
|
||||
):
|
||||
"""Test should_advance when reasoning has already ended."""
|
||||
manager = StructuredOutputManager(mock_vllm_config)
|
||||
manager.reasoner = mock_reasoning_parser
|
||||
|
||||
# Set reasoning as already ended
|
||||
(
|
||||
mock_request_with_structured_output.structured_output_request
|
||||
).reasoning_ended = True
|
||||
|
||||
result = manager.should_advance(mock_request_with_structured_output)
|
||||
|
||||
# Should return True since reasoning has ended
|
||||
assert result is True
|
||||
106
tests/v1/structured_output/test_utils.py
Normal file
106
tests/v1/structured_output/test_utils.py
Normal file
@@ -0,0 +1,106 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.v1.structured_output.backend_xgrammar import (
|
||||
has_xgrammar_unsupported_json_features,
|
||||
)
|
||||
|
||||
pytestmark = pytest.mark.cpu_test
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def unsupported_string_schemas():
|
||||
return [
|
||||
{"type": "string", "format": "non_existing_format"},
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def unsupported_integer_schemas():
|
||||
return [
|
||||
{"type": "integer", "multipleOf": 120},
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def unsupported_number_schemas():
|
||||
return [
|
||||
{"type": "number", "multipleOf": 120},
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def unsupported_array_schemas():
|
||||
return [
|
||||
{"type": "array", "uniqueItems": True},
|
||||
{"type": "array", "contains": {"type": "string"}},
|
||||
{"type": "array", "minContains": 1},
|
||||
{"type": "array", "maxContains": 5},
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def unsupported_object_schemas():
|
||||
return [
|
||||
{"type": "object", "propertyNames": {"pattern": "^[a-z]+$"}},
|
||||
{"type": "object", "patternProperties": {"^S": {"type": "string"}}},
|
||||
]
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def supported_schema():
|
||||
return {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"name": {"type": "string"},
|
||||
"age": {"type": "integer"},
|
||||
"email": {"type": "string", "format": "email"},
|
||||
"status": {"type": "string"},
|
||||
"scores": {"type": "array", "items": {"type": "number"}},
|
||||
"car_type": {"type": "string", "enum": ["sedan", "suv", "truck"]},
|
||||
"car_brand": {"type": "string", "pattern": "^[a-zA-Z]+$"},
|
||||
"short_description": {"type": "string", "maxLength": 50},
|
||||
"mileage": {"type": "number", "minimum": 0, "maximum": 1000000},
|
||||
"model_year": {
|
||||
"type": "integer",
|
||||
"exclusiveMinimum": 1900,
|
||||
"exclusiveMaximum": 2100,
|
||||
},
|
||||
"long_description": {"type": "string", "minLength": 50, "maxLength": 2000},
|
||||
"address": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"street": {"type": "string"},
|
||||
"city": {"type": "string"},
|
||||
},
|
||||
},
|
||||
},
|
||||
"minProperties": 1,
|
||||
"maxProperties": 100,
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"schema_type",
|
||||
[
|
||||
"unsupported_string_schemas",
|
||||
"unsupported_integer_schemas",
|
||||
"unsupported_number_schemas",
|
||||
"unsupported_array_schemas",
|
||||
"unsupported_object_schemas",
|
||||
],
|
||||
)
|
||||
def test_unsupported_json_features_by_type(schema_type, request):
|
||||
schemas = request.getfixturevalue(schema_type)
|
||||
for schema in schemas:
|
||||
assert has_xgrammar_unsupported_json_features(schema), (
|
||||
f"Schema should be unsupported: {schema}"
|
||||
)
|
||||
|
||||
|
||||
def test_supported_json_features(supported_schema):
|
||||
assert not has_xgrammar_unsupported_json_features(supported_schema), (
|
||||
"Schema should be supported"
|
||||
)
|
||||
Reference in New Issue
Block a user