Sync from v0.13
This commit is contained in:
0
tests/reasoning/__init__.py
Normal file
0
tests/reasoning/__init__.py
Normal file
421
tests/reasoning/test_base_thinking_reasoning_parser.py
Normal file
421
tests/reasoning/test_base_thinking_reasoning_parser.py
Normal file
@@ -0,0 +1,421 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from tests.reasoning.utils import run_reasoning_extraction
|
||||
from vllm.entrypoints.openai.protocol import ChatCompletionRequest
|
||||
from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
|
||||
|
||||
|
||||
# Create a concrete test implementation of BaseThinkingReasoningParser
|
||||
class TestThinkingReasoningParser(BaseThinkingReasoningParser):
|
||||
"""Test implementation of BaseThinkingReasoningParser."""
|
||||
|
||||
@property
|
||||
def start_token(self) -> str:
|
||||
return "<test:think>"
|
||||
|
||||
@property
|
||||
def end_token(self) -> str:
|
||||
return "</test:think>"
|
||||
|
||||
|
||||
class TestThinkingReasoningParserAlt(BaseThinkingReasoningParser):
|
||||
"""Alternative test implementation with different tokens."""
|
||||
|
||||
@property
|
||||
def start_token(self) -> str:
|
||||
return "<alt:start>"
|
||||
|
||||
@property
|
||||
def end_token(self) -> str:
|
||||
return "<alt:end>"
|
||||
|
||||
|
||||
# Use a test model
|
||||
REASONING_MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def test_tokenizer():
|
||||
tokenizer = AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
|
||||
# Add custom test tokens
|
||||
test_tokens = ["<test:think>", "</test:think>", "<alt:start>", "<alt:end>"]
|
||||
existing_tokens = set(tokenizer.get_vocab().keys())
|
||||
new_tokens = [token for token in test_tokens if token not in existing_tokens]
|
||||
if new_tokens:
|
||||
tokenizer.add_tokens(new_tokens)
|
||||
return tokenizer
|
||||
|
||||
|
||||
class TestBaseThinkingReasoningParserInit:
|
||||
"""
|
||||
Test initialization and basic properties of
|
||||
BaseThinkingReasoningParser.
|
||||
"""
|
||||
|
||||
def test_successful_initialization(self, test_tokenizer):
|
||||
"""Test successful initialization with valid tokens."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
assert parser.start_token == "<test:think>"
|
||||
assert parser.end_token == "</test:think>"
|
||||
assert parser.start_token_id is not None
|
||||
assert parser.end_token_id is not None
|
||||
|
||||
def test_initialization_with_missing_tokenizer(self):
|
||||
"""Test that initialization fails without tokenizer."""
|
||||
with pytest.raises(ValueError, match="model tokenizer must be passed"):
|
||||
TestThinkingReasoningParser(None)
|
||||
|
||||
def test_initialization_with_missing_tokens(self, test_tokenizer):
|
||||
"""Test that initialization fails when tokens are not in vocabulary."""
|
||||
|
||||
# Create a parser with tokens not in vocabulary
|
||||
class MissingTokenParser(BaseThinkingReasoningParser):
|
||||
@property
|
||||
def start_token(self) -> str:
|
||||
return "<missing:start>"
|
||||
|
||||
@property
|
||||
def end_token(self) -> str:
|
||||
return "<missing:end>"
|
||||
|
||||
with pytest.raises(
|
||||
RuntimeError, match="could not locate think start/end tokens"
|
||||
):
|
||||
MissingTokenParser(test_tokenizer)
|
||||
|
||||
def test_initialization_with_empty_tokens(self, test_tokenizer):
|
||||
"""Test that initialization fails with empty token strings."""
|
||||
|
||||
class EmptyTokenParser(BaseThinkingReasoningParser):
|
||||
@property
|
||||
def start_token(self) -> str:
|
||||
return ""
|
||||
|
||||
@property
|
||||
def end_token(self) -> str:
|
||||
return ""
|
||||
|
||||
with pytest.raises(
|
||||
ValueError, match="start_token and end_token must be defined"
|
||||
):
|
||||
EmptyTokenParser(test_tokenizer)
|
||||
|
||||
|
||||
class TestBaseThinkingReasoningParserMethods:
|
||||
"""Test the methods of BaseThinkingReasoningParser."""
|
||||
|
||||
def test_is_reasoning_end(self, test_tokenizer):
|
||||
"""Test the is_reasoning_end method."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
end_token_id = parser.end_token_id
|
||||
start_token_id = parser.start_token_id
|
||||
# Test with end token present
|
||||
assert parser.is_reasoning_end([1, 2, end_token_id, 4]) is True
|
||||
|
||||
# Test without end token
|
||||
assert parser.is_reasoning_end([1, 2, 3, 4]) is False
|
||||
|
||||
# Test with empty list
|
||||
assert parser.is_reasoning_end([]) is False
|
||||
|
||||
# Test with interleaved thinking
|
||||
assert parser.is_reasoning_end([1, start_token_id, 2, end_token_id]) is True
|
||||
assert parser.is_reasoning_end([1, start_token_id, 2, 3]) is False
|
||||
assert (
|
||||
parser.is_reasoning_end(
|
||||
[1, start_token_id, 2, end_token_id, 2, 2, start_token_id]
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
def test_is_reasoning_end_streaming(self, test_tokenizer):
|
||||
"""Test the is_reasoning_end_streaming method."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
end_token_id = parser.end_token_id
|
||||
start_token_id = parser.start_token_id
|
||||
|
||||
assert (
|
||||
parser.is_reasoning_end_streaming([1, 2, end_token_id], [end_token_id])
|
||||
is True
|
||||
)
|
||||
assert parser.is_reasoning_end_streaming([1, 2, 3, 4], [4]) is False
|
||||
assert parser.is_reasoning_end_streaming([], []) is False
|
||||
assert (
|
||||
parser.is_reasoning_end_streaming(
|
||||
[1, start_token_id, 2, end_token_id], [end_token_id]
|
||||
)
|
||||
is True
|
||||
)
|
||||
assert (
|
||||
parser.is_reasoning_end_streaming([1, start_token_id, 2, 3], [3]) is False
|
||||
)
|
||||
assert (
|
||||
parser.is_reasoning_end_streaming(
|
||||
[1, start_token_id, 2, end_token_id, 2, start_token_id, 2],
|
||||
[2],
|
||||
)
|
||||
is False
|
||||
)
|
||||
assert (
|
||||
parser.is_reasoning_end_streaming(
|
||||
[1, start_token_id, 2, end_token_id, 2, 2], [2]
|
||||
)
|
||||
is False
|
||||
)
|
||||
|
||||
def test_extract_content_ids(self, test_tokenizer):
|
||||
"""Test the extract_content_ids method."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
end_token_id = parser.end_token_id
|
||||
|
||||
# Test with end token in the middle
|
||||
input_ids = [1, 2, end_token_id, 4, 5]
|
||||
content_ids = parser.extract_content_ids(input_ids)
|
||||
assert content_ids == [4, 5]
|
||||
|
||||
# Test with end token at the end
|
||||
input_ids = [1, 2, 3, end_token_id]
|
||||
content_ids = parser.extract_content_ids(input_ids)
|
||||
assert content_ids == []
|
||||
|
||||
# Test without end token
|
||||
input_ids = [1, 2, 3, 4]
|
||||
content_ids = parser.extract_content_ids(input_ids)
|
||||
assert content_ids == []
|
||||
|
||||
# Test with end token as last element (should not extract)
|
||||
input_ids = [1, 2, 3, end_token_id]
|
||||
content_ids = parser.extract_content_ids(input_ids)
|
||||
assert content_ids == []
|
||||
|
||||
|
||||
class TestBaseThinkingReasoningParserExtraction:
|
||||
"""Test reasoning content extraction methods."""
|
||||
|
||||
def test_extract_reasoning_with_both_tokens(self, test_tokenizer):
|
||||
"""Test extraction when both start and end tokens are present."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
request = ChatCompletionRequest(messages=[], model="test-model")
|
||||
|
||||
model_output = "<test:think>This is reasoning</test:think>This is content"
|
||||
reasoning, content = parser.extract_reasoning(model_output, request)
|
||||
|
||||
assert reasoning == "This is reasoning"
|
||||
assert content == "This is content"
|
||||
|
||||
def test_extract_reasoning_only_end_token(self, test_tokenizer):
|
||||
"""Test extraction when only end token is present."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
request = ChatCompletionRequest(messages=[], model="test-model")
|
||||
|
||||
model_output = "This is reasoning</test:think>This is content"
|
||||
reasoning, content = parser.extract_reasoning(model_output, request)
|
||||
|
||||
assert reasoning == "This is reasoning"
|
||||
assert content == "This is content"
|
||||
|
||||
def test_extract_reasoning_no_end_token(self, test_tokenizer):
|
||||
"""Test extraction when no end token is present."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
request = ChatCompletionRequest(messages=[], model="test-model")
|
||||
|
||||
model_output = "This is just content"
|
||||
reasoning, content = parser.extract_reasoning(model_output, request)
|
||||
|
||||
assert reasoning == "This is just content"
|
||||
assert content is None
|
||||
|
||||
def test_extract_reasoning_empty_output(self, test_tokenizer):
|
||||
"""Test extraction with empty output."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
request = ChatCompletionRequest(messages=[], model="test-model")
|
||||
|
||||
model_output = ""
|
||||
reasoning, content = parser.extract_reasoning(model_output, request)
|
||||
|
||||
assert reasoning == ""
|
||||
assert content is None
|
||||
|
||||
def test_extract_reasoning_only_tokens(self, test_tokenizer):
|
||||
"""Test extraction with only tokens and no content."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
request = ChatCompletionRequest(messages=[], model="test-model")
|
||||
|
||||
model_output = "<test:think></test:think>"
|
||||
reasoning, content = parser.extract_reasoning(model_output, request)
|
||||
|
||||
assert reasoning == ""
|
||||
assert content is None
|
||||
|
||||
|
||||
class TestBaseThinkingReasoningParserStreaming:
|
||||
"""Test streaming functionality of BaseThinkingReasoningParser."""
|
||||
|
||||
@pytest.mark.parametrize("streaming", [True, False])
|
||||
def test_simple_reasoning_extraction(self, test_tokenizer, streaming):
|
||||
"""
|
||||
Test basic reasoning extraction in both
|
||||
streaming and non-streaming modes.
|
||||
"""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
|
||||
model_output = [
|
||||
"<test:think>",
|
||||
"Some ",
|
||||
"reasoning ",
|
||||
"content",
|
||||
"</test:think>",
|
||||
"Final ",
|
||||
"answer",
|
||||
]
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, model_output, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == "Some reasoning content"
|
||||
assert content == "Final answer"
|
||||
|
||||
def test_streaming_with_incremental_deltas(self, test_tokenizer):
|
||||
"""Test streaming processing with small incremental deltas."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
|
||||
deltas = [
|
||||
"<test:think>",
|
||||
"Some ",
|
||||
"reasoning ",
|
||||
"content",
|
||||
"</test:think>",
|
||||
"Final ",
|
||||
"answer",
|
||||
]
|
||||
|
||||
reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)
|
||||
|
||||
assert reasoning == "Some reasoning content"
|
||||
assert content == "Final answer"
|
||||
|
||||
def test_streaming_with_start_token(self, test_tokenizer):
|
||||
"""Test streaming with start token included."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
|
||||
deltas = [
|
||||
"<test:think>",
|
||||
"Some ",
|
||||
"reasoning",
|
||||
"</test:think>",
|
||||
"Answer",
|
||||
]
|
||||
|
||||
reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)
|
||||
|
||||
assert reasoning == "Some reasoning"
|
||||
assert content == "Answer"
|
||||
|
||||
def test_streaming_no_end_token(self, test_tokenizer):
|
||||
"""Test streaming when no end token is encountered."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
|
||||
deltas = [
|
||||
"<test:think>",
|
||||
"Some ",
|
||||
"reasoning ",
|
||||
"without ",
|
||||
"end",
|
||||
]
|
||||
|
||||
reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)
|
||||
|
||||
assert reasoning == "Some reasoning without end"
|
||||
assert content is None
|
||||
|
||||
def test_streaming_only_end_token(self, test_tokenizer):
|
||||
"""Test streaming when only end token appears."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
|
||||
deltas = [
|
||||
"<test:think>",
|
||||
"Reasoning ",
|
||||
"content",
|
||||
"</test:think>",
|
||||
"Final",
|
||||
]
|
||||
|
||||
reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)
|
||||
|
||||
assert reasoning == "Reasoning content"
|
||||
assert content == "Final"
|
||||
|
||||
|
||||
class TestBaseThinkingReasoningParserMultipleImplementations:
|
||||
"""
|
||||
Test that multiple implementations of
|
||||
BaseThinkingReasoningParser work correctly.
|
||||
"""
|
||||
|
||||
def test_different_token_implementations(self, test_tokenizer):
|
||||
"""
|
||||
Test that different implementations
|
||||
with different tokens work independently.
|
||||
"""
|
||||
parser1 = TestThinkingReasoningParser(test_tokenizer)
|
||||
parser2 = TestThinkingReasoningParserAlt(test_tokenizer)
|
||||
|
||||
# Test parser1
|
||||
model_output1 = "Reasoning1</test:think>Content1"
|
||||
reasoning1, content1 = run_reasoning_extraction(parser1, [model_output1])
|
||||
assert reasoning1 == "Reasoning1"
|
||||
assert content1 == "Content1"
|
||||
|
||||
# Test parser2
|
||||
model_output2 = "Reasoning2<alt:end>Content2"
|
||||
reasoning2, content2 = run_reasoning_extraction(parser2, [model_output2])
|
||||
assert reasoning2 == "Reasoning2"
|
||||
assert content2 == "Content2"
|
||||
|
||||
# Verify tokens are different
|
||||
assert parser1.start_token != parser2.start_token
|
||||
assert parser1.end_token != parser2.end_token
|
||||
assert parser1.start_token_id != parser2.start_token_id
|
||||
assert parser1.end_token_id != parser2.end_token_id
|
||||
|
||||
|
||||
class TestBaseThinkingReasoningParserEdgeCases:
|
||||
"""Test edge cases and error conditions."""
|
||||
|
||||
def test_multiple_end_tokens(self, test_tokenizer):
|
||||
"""Test behavior with multiple end tokens."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
|
||||
model_output = "First</test:think>Middle</test:think>Last"
|
||||
reasoning, content = run_reasoning_extraction(parser, [model_output])
|
||||
|
||||
# Should stop at first end token
|
||||
assert reasoning == "First"
|
||||
assert content == "Middle</test:think>Last"
|
||||
|
||||
def test_nested_tokens(self, test_tokenizer):
|
||||
"""Test behavior with nested-like token patterns."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
|
||||
model_output = "<test:think>Outer<test:think>Inner</test:think>Content"
|
||||
reasoning, content = run_reasoning_extraction(parser, [model_output])
|
||||
|
||||
# Should process normally, start from first start token
|
||||
assert reasoning == "Outer<test:think>Inner"
|
||||
assert content == "Content"
|
||||
|
||||
def test_malformed_tokens(self, test_tokenizer):
|
||||
"""Test behavior with malformed token-like strings."""
|
||||
parser = TestThinkingReasoningParser(test_tokenizer)
|
||||
|
||||
model_output = "<test:thinking>Not a real token</test:thinking>Content"
|
||||
reasoning, content = run_reasoning_extraction(parser, [model_output])
|
||||
|
||||
# Should treat as regular content since tokens don't match exactly
|
||||
assert reasoning == ("<test:thinking>Not a real token</test:thinking>Content")
|
||||
assert content is None
|
||||
288
tests/reasoning/test_deepseekr1_reasoning_parser.py
Normal file
288
tests/reasoning/test_deepseekr1_reasoning_parser.py
Normal file
@@ -0,0 +1,288 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from tests.reasoning.utils import run_reasoning_extraction
|
||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||
|
||||
parser_name = "deepseek_r1"
|
||||
start_token = "<think>"
|
||||
end_token = "</think>"
|
||||
|
||||
REASONING_MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def deepseek_r1_qwen_tokenizer():
|
||||
return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
|
||||
|
||||
|
||||
SIMPLE_REASONING = {
|
||||
"output": "This is a reasoning section</think>This is the rest",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
COMPLETE_REASONING = {
|
||||
"output": "This is a reasoning section</think>",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
NO_CONTENT = {
|
||||
"output": "This is content",
|
||||
"reasoning": "This is content",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
NO_REASONING_STREAMING = {
|
||||
"output": "This is a reasoning section",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
MULTIPLE_LINES = {
|
||||
"output": "This\nThat</think>This is the rest\nThat",
|
||||
"reasoning": "This\nThat",
|
||||
"content": "This is the rest\nThat",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
SHORTEST_REASONING_NO_STREAMING = {
|
||||
"output": "</think>This is the rest",
|
||||
"reasoning": "",
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
SHORTEST_REASONING = {
|
||||
"output": "</think>This is the rest",
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
REASONING_WITH_THINK = {
|
||||
"output": "<think>This is a reasoning section</think>This is the rest",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
COMPLETE_REASONING_WITH_THINK = {
|
||||
"output": "<think>This is a reasoning section</think>",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
MULTIPLE_LINES_WITH_THINK = {
|
||||
"output": "<think>This\nThat</think>This is the rest\nThat",
|
||||
"reasoning": "This\nThat",
|
||||
"content": "This is the rest\nThat",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
SHORTEST_REASONING_NO_STREAMING_WITH_THINK = {
|
||||
"output": "</think>This is the rest",
|
||||
"reasoning": "",
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
SHORTEST_REASONING_WITH_THINK = {
|
||||
"output": "</think>This is the rest",
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
THINK_NO_END = {
|
||||
"output": "<think>This is a reasoning section",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
EMPTY = {
|
||||
"output": "",
|
||||
"reasoning": "",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
EMPTY_STREAMING = {
|
||||
"output": "",
|
||||
"reasoning": None,
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
NEW_LINE = {
|
||||
"output": "\n<think>This is a reasoning section</think>\nThis is the rest",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "\nThis is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
# Streaming cannot handle new lines at the beginning of the output
|
||||
# because we need to support <think>...</think> and </think>...
|
||||
# We cannot know if the text before <think> is reasoning content
|
||||
# or not.
|
||||
NEW_LINE_STREAMING = {
|
||||
"output": "\n<think>This is a reasoning section</think>\nThis is the rest",
|
||||
"reasoning": "\nThis is a reasoning section",
|
||||
"content": "\nThis is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
TEST_CASES = [
|
||||
pytest.param(
|
||||
False,
|
||||
SIMPLE_REASONING,
|
||||
id="simple_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
SIMPLE_REASONING,
|
||||
id="simple_reasoning_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
COMPLETE_REASONING,
|
||||
id="complete_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
COMPLETE_REASONING,
|
||||
id="complete_reasoning_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
NO_CONTENT,
|
||||
id="no_content_token",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
NO_REASONING_STREAMING,
|
||||
id="no_reasoning_token_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
MULTIPLE_LINES,
|
||||
id="multiple_lines",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
MULTIPLE_LINES,
|
||||
id="multiple_lines_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
SHORTEST_REASONING,
|
||||
id="shortest",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
SHORTEST_REASONING_NO_STREAMING,
|
||||
id="shortest_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
REASONING_WITH_THINK,
|
||||
id="reasoning_with_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
REASONING_WITH_THINK,
|
||||
id="reasoning_with_think_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
COMPLETE_REASONING_WITH_THINK,
|
||||
id="complete_reasoning_with_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
COMPLETE_REASONING_WITH_THINK,
|
||||
id="complete_reasoning_with_think_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
MULTIPLE_LINES_WITH_THINK,
|
||||
id="multiple_lines_with_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
MULTIPLE_LINES_WITH_THINK,
|
||||
id="multiple_lines_with_think_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
SHORTEST_REASONING_NO_STREAMING_WITH_THINK,
|
||||
id="shortest_with_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
SHORTEST_REASONING_WITH_THINK,
|
||||
id="shortest_with_think_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
THINK_NO_END,
|
||||
id="think_no_end",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
THINK_NO_END,
|
||||
id="think_no_end_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
EMPTY,
|
||||
id="empty",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
EMPTY_STREAMING,
|
||||
id="empty_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
NEW_LINE,
|
||||
id="new_line",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
NEW_LINE_STREAMING,
|
||||
id="new_line_streaming",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
|
||||
def test_reasoning(
|
||||
streaming: bool,
|
||||
param_dict: dict,
|
||||
deepseek_r1_qwen_tokenizer,
|
||||
):
|
||||
output = deepseek_r1_qwen_tokenizer.tokenize(param_dict["output"])
|
||||
# decode everything to tokens
|
||||
output_tokens: list[str] = [
|
||||
deepseek_r1_qwen_tokenizer.convert_tokens_to_string([token]) for token in output
|
||||
]
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
deepseek_r1_qwen_tokenizer
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning"]
|
||||
assert content == param_dict["content"]
|
||||
|
||||
# Test is_reasoning_end
|
||||
output_ids = deepseek_r1_qwen_tokenizer.convert_tokens_to_ids(output)
|
||||
is_reasoning_end = parser.is_reasoning_end(output_ids)
|
||||
assert is_reasoning_end == param_dict["is_reasoning_end"]
|
||||
|
||||
# Test extract_content
|
||||
if param_dict["content"] is not None:
|
||||
content = parser.extract_content_ids(output_ids)
|
||||
assert content == deepseek_r1_qwen_tokenizer.convert_tokens_to_ids(
|
||||
deepseek_r1_qwen_tokenizer.tokenize(param_dict["content"])
|
||||
)
|
||||
else:
|
||||
content = parser.extract_content_ids(output)
|
||||
assert content == []
|
||||
75
tests/reasoning/test_deepseekv3_reasoning_parser.py
Normal file
75
tests/reasoning/test_deepseekv3_reasoning_parser.py
Normal file
@@ -0,0 +1,75 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
|
||||
from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
|
||||
from vllm.reasoning.deepseek_v3_reasoning_parser import DeepSeekV3ReasoningParser
|
||||
from vllm.reasoning.identity_reasoning_parser import IdentityReasoningParser
|
||||
|
||||
REASONING_MODEL_NAME = "deepseek-ai/DeepSeek-V3.1"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def tokenizer():
|
||||
return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"thinking,expected_parser_type",
|
||||
[
|
||||
(True, DeepSeekR1ReasoningParser),
|
||||
(False, IdentityReasoningParser),
|
||||
],
|
||||
)
|
||||
def test_parser_selection(tokenizer, thinking, expected_parser_type):
|
||||
parser = DeepSeekV3ReasoningParser(
|
||||
tokenizer, chat_template_kwargs={"thinking": thinking}
|
||||
)
|
||||
|
||||
assert isinstance(parser._parser, expected_parser_type)
|
||||
|
||||
|
||||
def test_identity_reasoning_parser_basic(tokenizer):
|
||||
parser = IdentityReasoningParser(tokenizer)
|
||||
|
||||
# Test is_reasoning_end always returns True
|
||||
input_text = "This is some output"
|
||||
input_tokens = tokenizer.tokenize(input_text)
|
||||
input_ids = tokenizer.convert_tokens_to_ids(input_tokens)
|
||||
assert parser.is_reasoning_end(input_ids) is True
|
||||
assert parser.is_reasoning_end_streaming(input_ids, input_ids) is True
|
||||
|
||||
# Test extract_content_ids returns all input_ids
|
||||
assert parser.extract_content_ids(input_ids) == input_ids
|
||||
|
||||
# Test extract_reasoning returns (None, model_output)
|
||||
request = ChatCompletionRequest(model="test-model", messages=[], temperature=1.0)
|
||||
reasoning, content = parser.extract_reasoning(input_text, request)
|
||||
assert reasoning is None
|
||||
assert content == input_text
|
||||
|
||||
# Test extract_reasoning_streaming returns DeltaMessage or None
|
||||
result = parser.extract_reasoning_streaming(
|
||||
previous_text="",
|
||||
current_text="Hello world",
|
||||
delta_text="Hello world",
|
||||
previous_token_ids=[],
|
||||
current_token_ids=input_ids,
|
||||
delta_token_ids=input_ids,
|
||||
)
|
||||
assert isinstance(result, DeltaMessage)
|
||||
assert result.content == "Hello world"
|
||||
|
||||
# If delta_text is empty, should return None
|
||||
result_none = parser.extract_reasoning_streaming(
|
||||
previous_text="Hello world",
|
||||
current_text="Hello world",
|
||||
delta_text="",
|
||||
previous_token_ids=input_ids,
|
||||
current_token_ids=input_ids,
|
||||
delta_token_ids=[],
|
||||
)
|
||||
assert result_none is None
|
||||
124
tests/reasoning/test_ernie45_reasoning_parser.py
Normal file
124
tests/reasoning/test_ernie45_reasoning_parser.py
Normal file
@@ -0,0 +1,124 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from tests.reasoning.utils import run_reasoning_extraction
|
||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||
|
||||
parser_name = "ernie45"
|
||||
|
||||
REASONING_MODEL_NAME = "baidu/ERNIE-4.5-21B-A3B-Thinking"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def ernie45_tokenizer():
|
||||
return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
|
||||
|
||||
|
||||
# 带 </think>,非stream
|
||||
WITH_THINK = {
|
||||
"output": "abc</think>def",
|
||||
"reasoning": "abc",
|
||||
"content": "def",
|
||||
}
|
||||
# 带 </think>,stream
|
||||
WITH_THINK_STREAM = {
|
||||
"output": "abc</think>def",
|
||||
"reasoning": "abc",
|
||||
"content": "def",
|
||||
}
|
||||
# without </think>, all is reasoning
|
||||
WITHOUT_THINK = {
|
||||
"output": "abc",
|
||||
"reasoning": "abc",
|
||||
"content": None,
|
||||
}
|
||||
# without </think>, all is reasoning
|
||||
WITHOUT_THINK_STREAM = {
|
||||
"output": "abc",
|
||||
"reasoning": "abc",
|
||||
"content": None,
|
||||
}
|
||||
|
||||
COMPLETE_REASONING = {
|
||||
"output": "abc</think>",
|
||||
"reasoning": "abc",
|
||||
"content": None,
|
||||
}
|
||||
MULTILINE_REASONING = {
|
||||
"output": "abc\nABC</think>def\nDEF",
|
||||
"reasoning": "abc\nABC",
|
||||
"content": "def\nDEF",
|
||||
}
|
||||
|
||||
TEST_CASES = [
|
||||
pytest.param(
|
||||
False,
|
||||
WITH_THINK,
|
||||
id="with_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
WITH_THINK_STREAM,
|
||||
id="with_think_stream",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
WITHOUT_THINK,
|
||||
id="without_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
WITHOUT_THINK_STREAM,
|
||||
id="without_think_stream",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
COMPLETE_REASONING,
|
||||
id="complete_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
COMPLETE_REASONING,
|
||||
id="complete_reasoning_stream",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
MULTILINE_REASONING,
|
||||
id="multiline_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
MULTILINE_REASONING,
|
||||
id="multiline_reasoning_stream",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
|
||||
def test_reasoning(
|
||||
streaming: bool,
|
||||
param_dict: dict,
|
||||
ernie45_tokenizer,
|
||||
):
|
||||
output = ernie45_tokenizer.tokenize(param_dict["output"])
|
||||
output_tokens: list[str] = []
|
||||
for token in output:
|
||||
one_token = ernie45_tokenizer.convert_tokens_to_string([token])
|
||||
if one_token:
|
||||
output_tokens.append(one_token)
|
||||
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
ernie45_tokenizer
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
print()
|
||||
|
||||
assert reasoning == param_dict["reasoning"]
|
||||
assert content == param_dict["content"]
|
||||
205
tests/reasoning/test_glm4_moe_reasoning_parser.py
Normal file
205
tests/reasoning/test_glm4_moe_reasoning_parser.py
Normal file
@@ -0,0 +1,205 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from tests.reasoning.utils import run_reasoning_extraction
|
||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||
|
||||
parser_name = "glm45"
|
||||
start_token = "<think>"
|
||||
end_token = "</think>"
|
||||
|
||||
REASONING_MODEL_NAME = "zai-org/GLM-4.5"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def glm45_tokenizer():
|
||||
return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
|
||||
|
||||
|
||||
WITH_THINK = {
|
||||
"output": "<think>This is a reasoning section</think>This is the rest",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
WITH_THINK_STREAM = {
|
||||
"output": "<think>This is a reasoning section</think>This is the rest",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
WITHOUT_THINK = {
|
||||
"output": "This is the rest",
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
|
||||
WITHOUT_THINK_STREAM = {
|
||||
"output": "This is the rest",
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
|
||||
COMPLETE_REASONING = {
|
||||
"output": "<think>This is a reasoning section</think>",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
MULTILINE_REASONING = {
|
||||
"output": "<think>This is a reasoning\nsection</think>This is the rest\nThat",
|
||||
"reasoning": "This is a reasoning\nsection",
|
||||
"content": "This is the rest\nThat",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
ONLY_OPEN_TAG = {
|
||||
"output": "<think>This is a reasoning section",
|
||||
"reasoning": None,
|
||||
"content": "<think>This is a reasoning section",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
|
||||
ONLY_OPEN_TAG_STREAM = {
|
||||
"output": "<think>This is a reasoning section",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
|
||||
TEST_CASES = [
|
||||
pytest.param(
|
||||
False,
|
||||
WITH_THINK,
|
||||
id="with_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
WITH_THINK_STREAM,
|
||||
id="with_think_stream",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
WITHOUT_THINK,
|
||||
id="without_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
WITHOUT_THINK_STREAM,
|
||||
id="without_think_stream",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
COMPLETE_REASONING,
|
||||
id="complete_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
COMPLETE_REASONING,
|
||||
id="complete_reasoning_stream",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
MULTILINE_REASONING,
|
||||
id="multiline_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
MULTILINE_REASONING,
|
||||
id="multiline_reasoning_stream",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
ONLY_OPEN_TAG,
|
||||
id="only_open_tag",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
ONLY_OPEN_TAG_STREAM,
|
||||
id="only_open_tag_stream",
|
||||
),
|
||||
]
|
||||
|
||||
STILL_REASONING_PROMPT = """[gMASK]<sop><|system|>
|
||||
You are a helpful assistant.<|user|>
|
||||
What is the capital of France?<|assistant|>
|
||||
<think>The user is asking for the capital of"""
|
||||
|
||||
DONE_REASONING_PROMPT = """[gMASK]<sop><|system|>
|
||||
You are a helpful assistant.<|user|>
|
||||
What is the capital of France?<|assistant|>
|
||||
<think>The user is asking for the capital of France.</think>
|
||||
The capital of France is Paris."""
|
||||
|
||||
MULTI_TURN_STILL_REASONING_PROMPT = """[gMASK]<sop><|system|>
|
||||
You are a helpful assistant.<|user|>
|
||||
What is the capital of France?<|assistant|>
|
||||
<think></think>
|
||||
The capital of France is Paris.<|user|>
|
||||
What about Chile?<|assistant|>
|
||||
<think>The user is asking for the capital of"""
|
||||
|
||||
MULTI_TURN_DONE_REASONING_PROMPT = """[gMASK]<sop><|system|>
|
||||
You are a helpful assistant.<|user|>
|
||||
What is the capital of France?<|assistant|>
|
||||
<think></think>
|
||||
The capital of France is Paris.<|user|>
|
||||
What about Chile?<|assistant|>
|
||||
<think>The user is asking for the capital of Chile.</think>
|
||||
The capital of Chile is Santiago."""
|
||||
|
||||
REASONING_END_TEST_CASES = [
|
||||
pytest.param(STILL_REASONING_PROMPT, False, id="still_reasoning"),
|
||||
pytest.param(DONE_REASONING_PROMPT, True, id="done_reasoning"),
|
||||
pytest.param(
|
||||
MULTI_TURN_STILL_REASONING_PROMPT, False, id="multi_turn_still_reasoning"
|
||||
),
|
||||
pytest.param(
|
||||
MULTI_TURN_DONE_REASONING_PROMPT, True, id="multi_turn_done_reasoning"
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
|
||||
def test_reasoning(
|
||||
streaming: bool,
|
||||
param_dict: dict,
|
||||
glm45_tokenizer,
|
||||
):
|
||||
output = glm45_tokenizer.tokenize(param_dict["output"])
|
||||
output_tokens: list[str] = [
|
||||
glm45_tokenizer.convert_tokens_to_string([token]) for token in output
|
||||
]
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
glm45_tokenizer
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning"]
|
||||
assert content == param_dict["content"]
|
||||
|
||||
output_ids = glm45_tokenizer.convert_tokens_to_ids(output)
|
||||
is_reasoning_end = parser.is_reasoning_end(output_ids)
|
||||
assert is_reasoning_end == param_dict["is_reasoning_end"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("prompt, is_reasoning_end", REASONING_END_TEST_CASES)
|
||||
def test_is_reasoning_end_full_prompt(
|
||||
prompt: str, is_reasoning_end: bool, glm45_tokenizer
|
||||
):
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
glm45_tokenizer
|
||||
)
|
||||
tokens = glm45_tokenizer.tokenize(prompt)
|
||||
token_ids = glm45_tokenizer.convert_tokens_to_ids(tokens)
|
||||
check_is_reasoning_end = parser.is_reasoning_end(token_ids)
|
||||
assert check_is_reasoning_end == is_reasoning_end
|
||||
127
tests/reasoning/test_gptoss_reasoning_parser.py
Normal file
127
tests/reasoning/test_gptoss_reasoning_parser.py
Normal file
@@ -0,0 +1,127 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from vllm.reasoning import ReasoningParser
|
||||
from vllm.reasoning.gptoss_reasoning_parser import GptOssReasoningParser
|
||||
|
||||
REASONING_MODEL_NAME = "openai/gpt-oss-120b"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def gpt_oss_tokenizer():
|
||||
return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
|
||||
|
||||
|
||||
USER_MESSAGE_START = "<|start|>user<|message|>"
|
||||
REASONING_SECTION_START = "<|end|><|start|>assistant<|channel|>analysis<|message|>"
|
||||
ASSISTANT_CONTENT_START_PREFIX = "<|end|><|start|>assistant<|channel|>final"
|
||||
ASSISTANT_CONTENT_START_SUFFIX = "<|message|>"
|
||||
ASSISTANT_CONTENT_START = (
|
||||
ASSISTANT_CONTENT_START_PREFIX + ASSISTANT_CONTENT_START_SUFFIX
|
||||
)
|
||||
|
||||
BASIC_CONTENT = {
|
||||
"output": REASONING_SECTION_START
|
||||
+ "This is reasoning"
|
||||
+ ASSISTANT_CONTENT_START
|
||||
+ "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
BASIC_REASONING_ONLY = {
|
||||
"output": REASONING_SECTION_START + "This is reasoning" + "<|end|>",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
BASIC_NO_REASONING_NO_ASSISTANT = {
|
||||
"output": USER_MESSAGE_START + "This is a user message",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
|
||||
# Edge-case where the model omits the assistant tag entirely.
|
||||
BASIC_NO_REASONING_ASSISTANT = {
|
||||
"output": USER_MESSAGE_START + "This is a user message<|end|><|channel|>final",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
COMPLEX_CONTENT_INCOMPLETE_PREFIX_ONLY = {
|
||||
"output": REASONING_SECTION_START
|
||||
+ "This is reasoning"
|
||||
+ ASSISTANT_CONTENT_START_PREFIX,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
|
||||
COMPLEX_CONTENT_SUFFIX_ONLY = {
|
||||
"output": REASONING_SECTION_START
|
||||
+ "This is reasoning"
|
||||
+ ASSISTANT_CONTENT_START_SUFFIX,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
|
||||
COMPLEX_CONTENT_1_NO_SUFFIX = {
|
||||
"output": REASONING_SECTION_START
|
||||
+ "This is reasoning"
|
||||
+ ASSISTANT_CONTENT_START_PREFIX
|
||||
+ "<|constrain|> JSON ",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
|
||||
COMPLEX_CONTENT_1 = {
|
||||
"output": REASONING_SECTION_START
|
||||
+ "This is reasoning"
|
||||
+ ASSISTANT_CONTENT_START_PREFIX
|
||||
+ "<|constrain|> JSON "
|
||||
+ ASSISTANT_CONTENT_START_SUFFIX,
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
COMPLEX_CONTENT_1_WITH_CONTENT = {
|
||||
"output": REASONING_SECTION_START
|
||||
+ "This is reasoning"
|
||||
+ ASSISTANT_CONTENT_START_PREFIX
|
||||
+ "<|constrain|> JSON "
|
||||
+ ASSISTANT_CONTENT_START_SUFFIX
|
||||
+ "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
COMPLEX_CONTENT_2 = {
|
||||
"output": REASONING_SECTION_START
|
||||
+ "This is reasoning"
|
||||
+ ASSISTANT_CONTENT_START_PREFIX
|
||||
+ "<|constrain|>ReplyAction "
|
||||
+ ASSISTANT_CONTENT_START_SUFFIX
|
||||
+ "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
TEST_CASES = [
|
||||
BASIC_CONTENT,
|
||||
BASIC_REASONING_ONLY,
|
||||
COMPLEX_CONTENT_INCOMPLETE_PREFIX_ONLY,
|
||||
COMPLEX_CONTENT_SUFFIX_ONLY,
|
||||
COMPLEX_CONTENT_1_NO_SUFFIX,
|
||||
COMPLEX_CONTENT_1,
|
||||
COMPLEX_CONTENT_1_WITH_CONTENT,
|
||||
COMPLEX_CONTENT_2,
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"output, is_reasoning_end",
|
||||
[(t["output"], t["is_reasoning_end"]) for t in TEST_CASES],
|
||||
)
|
||||
def test_gptoss_is_reasoning_end(
|
||||
output,
|
||||
is_reasoning_end,
|
||||
gpt_oss_tokenizer,
|
||||
):
|
||||
output = gpt_oss_tokenizer.tokenize(output)
|
||||
parser: ReasoningParser = GptOssReasoningParser(gpt_oss_tokenizer)
|
||||
|
||||
# Test is_reasoning_end
|
||||
output_ids = gpt_oss_tokenizer.convert_tokens_to_ids(output)
|
||||
actual_is_reasoning_end = parser.is_reasoning_end(output_ids)
|
||||
assert is_reasoning_end == actual_is_reasoning_end
|
||||
344
tests/reasoning/test_granite_reasoning_parser.py
Normal file
344
tests/reasoning/test_granite_reasoning_parser.py
Normal file
@@ -0,0 +1,344 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from tests.reasoning.utils import DeltaMessage, run_reasoning_extraction
|
||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||
|
||||
parser_name = "granite"
|
||||
START_REASONING = "Here is my thought process:"
|
||||
START_RESPONSE = "Here is my response:"
|
||||
|
||||
SIMPLE_REASONING = {
|
||||
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest", # noqa: E501
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
}
|
||||
COMPLETE_REASONING = {
|
||||
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
}
|
||||
NO_REASONING = {
|
||||
"output": "This is content",
|
||||
"reasoning": None,
|
||||
"content": "This is content",
|
||||
}
|
||||
MULTIPLE_LINES = {
|
||||
"output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
|
||||
"reasoning": "This\nThat",
|
||||
"content": "This is the rest\nThat",
|
||||
}
|
||||
REASONING_WITH_THINK = {
|
||||
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest", # noqa: E501
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
}
|
||||
COMPLETE_REASONING_WITH_THINK = {
|
||||
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
}
|
||||
MULTIPLE_LINES_WITH_THINK = {
|
||||
"output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
|
||||
"reasoning": "This\nThat",
|
||||
"content": "This is the rest\nThat",
|
||||
}
|
||||
|
||||
TEST_CASES = [
|
||||
pytest.param(
|
||||
False,
|
||||
SIMPLE_REASONING,
|
||||
id="simple_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
COMPLETE_REASONING,
|
||||
id="complete_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
NO_REASONING,
|
||||
id="no_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
MULTIPLE_LINES,
|
||||
id="multiple_lines",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
REASONING_WITH_THINK,
|
||||
id="reasoning_with_think",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
COMPLETE_REASONING_WITH_THINK,
|
||||
id="complete_reasoning_with_think",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
MULTIPLE_LINES_WITH_THINK,
|
||||
id="multiple_lines_with_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
SIMPLE_REASONING,
|
||||
id="simple_reasoning_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
COMPLETE_REASONING,
|
||||
id="complete_reasoning_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
NO_REASONING,
|
||||
id="no_reasoning_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
MULTIPLE_LINES,
|
||||
id="multiple_lines_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
REASONING_WITH_THINK,
|
||||
id="reasoning_with_think_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
COMPLETE_REASONING_WITH_THINK,
|
||||
id="complete_reasoning_with_think_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
MULTIPLE_LINES_WITH_THINK,
|
||||
id="multiple_lines_with_think_streaming",
|
||||
),
|
||||
]
|
||||
|
||||
# Global tokenizer initialization to avoid repeated loading
|
||||
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
|
||||
def test_reasoning(
|
||||
streaming: bool,
|
||||
param_dict: dict,
|
||||
):
|
||||
output = tokenizer.tokenize(param_dict["output"])
|
||||
# decode everything to tokens
|
||||
output_tokens: list[str] = [
|
||||
tokenizer.convert_tokens_to_string([token]) for token in output
|
||||
]
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
tokenizer
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning"]
|
||||
assert content == param_dict["content"]
|
||||
|
||||
|
||||
# Additional tests for verifying the correctness of granite streaming; this
|
||||
# is complicated because granite uses multiple tokens to indicate when thinking
|
||||
# is starting / when it's starting its response, so skipping special tokens
|
||||
# is awkward.
|
||||
|
||||
### Handling the start of reasoning
|
||||
STREAMING_1 = {
|
||||
"previous_text": None,
|
||||
"current_text": "Here",
|
||||
"delta_text": "Here",
|
||||
"reasoning": None,
|
||||
"content": None,
|
||||
}
|
||||
# When we fail, we should give what was previously being silenced first
|
||||
STREAMING_2 = {
|
||||
"previous_text": "Here is my thought",
|
||||
"current_text": "Here is my thought failure",
|
||||
"delta_text": " failure",
|
||||
"reasoning": None,
|
||||
"content": "Here is my thought failure",
|
||||
}
|
||||
# But then after the first one, we should only add the delta text to content
|
||||
STREAMING_3 = {
|
||||
"previous_text": "Here wrong",
|
||||
"current_text": " words",
|
||||
"delta_text": " Here wrong words",
|
||||
"reasoning": None,
|
||||
"content": " words",
|
||||
}
|
||||
# But then after the first one, we should only add the delta text to content
|
||||
STREAMING_4 = {
|
||||
"previous_text": "Here is my thought",
|
||||
"current_text": "Here is my thought process:",
|
||||
"delta_text": " process:",
|
||||
"reasoning": None,
|
||||
"content": None,
|
||||
}
|
||||
# Reasoning started successfully; parse reasoning content
|
||||
STREAMING_5 = {
|
||||
"previous_text": "Here is my thought process:",
|
||||
"current_text": "Here is my thought process: foo",
|
||||
"delta_text": " foo",
|
||||
"reasoning": " foo",
|
||||
"content": None,
|
||||
}
|
||||
# Response special sequence has started, but not finished.
|
||||
STREAMING_6 = {
|
||||
"previous_text": "Here is my thought process: foo",
|
||||
"current_text": "Here is my thought process: foo Here is",
|
||||
"delta_text": " Here is",
|
||||
"reasoning": " ",
|
||||
"content": None,
|
||||
}
|
||||
# Response special sequence started, but was broken; the reasoning
|
||||
# content should be the content that was previously unused.
|
||||
STREAMING_7 = {
|
||||
"previous_text": "Here is my thought process: foo Here is",
|
||||
"current_text": "Here is my thought process: foo Here is Here",
|
||||
"delta_text": " Here",
|
||||
"reasoning": "Here is ",
|
||||
"content": None,
|
||||
}
|
||||
# Response special sequence is ongoing
|
||||
STREAMING_8 = {
|
||||
"previous_text": "Here is my thought process: foo Here is my response:",
|
||||
"current_text": "Here is my thought process: foo Here is my response: bar",
|
||||
"delta_text": " bar",
|
||||
"reasoning": None,
|
||||
"content": " bar",
|
||||
}
|
||||
# The delta text has everything; we should be able to correctly parse both
|
||||
STREAMING_9 = {
|
||||
"previous_text": None,
|
||||
"current_text": "Here is my thought process: foo Here is my response: bar",
|
||||
"delta_text": "Here is my thought process: foo Here is my response: bar",
|
||||
"reasoning": " foo ",
|
||||
"content": " bar",
|
||||
}
|
||||
## The Response is ongoing, and the delta mixes reasoning content / content
|
||||
STREAMING_10 = {
|
||||
"previous_text": "Here is my thought process: foo",
|
||||
"current_text": "Here is my thought process: foo bar Here is my response: baz",
|
||||
"delta_text": " bar Here is my response: baz",
|
||||
"reasoning": " bar ",
|
||||
"content": " baz",
|
||||
}
|
||||
# The delta text starts a new substring that might be a response special seq
|
||||
STREAMING_11 = {
|
||||
"previous_text": "Here is my thought process: This is a reasoning section ",
|
||||
"current_text": "Here is my thought process: This is a reasoning section Here",
|
||||
"delta_text": "Here",
|
||||
"reasoning": None,
|
||||
"content": None,
|
||||
}
|
||||
# The delta text is finishing the response special seq
|
||||
STREAMING_12 = {
|
||||
"previous_text": "Here is my thought process: foo Here is my response",
|
||||
"current_text": "Here is my thought process: foo Here is my response:",
|
||||
"delta_text": ":",
|
||||
"reasoning": None,
|
||||
"content": None,
|
||||
}
|
||||
STREAMING_13 = {
|
||||
"previous_text": "Here is my thought process: foo Here",
|
||||
"current_text": "Here is my thought process: foo Here was",
|
||||
"delta_text": " was",
|
||||
"reasoning": "Here was",
|
||||
"content": None,
|
||||
}
|
||||
|
||||
STREAMING_SUBCASES = [
|
||||
pytest.param(
|
||||
STREAMING_1,
|
||||
id="Starting reasoning special sequence",
|
||||
),
|
||||
pytest.param(
|
||||
STREAMING_2,
|
||||
id="Unexpected start reasoning sequence",
|
||||
),
|
||||
pytest.param(
|
||||
STREAMING_3,
|
||||
id="Continuing unexpected start reasoning sequence",
|
||||
),
|
||||
pytest.param(
|
||||
STREAMING_4,
|
||||
id="Only start reasoning sequence and nothing else",
|
||||
),
|
||||
pytest.param(
|
||||
STREAMING_5,
|
||||
id="Reasoning content has started",
|
||||
),
|
||||
pytest.param(
|
||||
STREAMING_6,
|
||||
id="Response special sequence has started",
|
||||
),
|
||||
pytest.param(
|
||||
STREAMING_7,
|
||||
id="Response special sequence reset",
|
||||
),
|
||||
pytest.param(
|
||||
STREAMING_8,
|
||||
id="Response text has started",
|
||||
),
|
||||
pytest.param(
|
||||
STREAMING_9,
|
||||
id="Delta contains everything",
|
||||
),
|
||||
pytest.param(
|
||||
STREAMING_10,
|
||||
id="Delta contains some reasoning and response",
|
||||
),
|
||||
pytest.param(
|
||||
STREAMING_11,
|
||||
id="Delta starts response sequence",
|
||||
),
|
||||
pytest.param(
|
||||
STREAMING_12,
|
||||
id="Delta finishes response sequence",
|
||||
),
|
||||
pytest.param(
|
||||
STREAMING_13,
|
||||
id="Delta breaks potential responise sequence",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("param_dict", STREAMING_SUBCASES)
|
||||
def test_streaming_subcases(param_dict):
|
||||
# Get all of the token IDs
|
||||
previous_token_ids = (
|
||||
tokenizer.encode(param_dict["previous_text"])
|
||||
if param_dict["previous_text"] is not None
|
||||
else []
|
||||
)
|
||||
current_token_ids = tokenizer.encode(param_dict["current_text"])
|
||||
delta_token_ids = tokenizer.encode(param_dict["delta_text"])
|
||||
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
tokenizer
|
||||
)
|
||||
|
||||
response = parser.extract_reasoning_streaming(
|
||||
previous_text=param_dict["previous_text"],
|
||||
current_text=param_dict["current_text"],
|
||||
delta_text=param_dict["delta_text"],
|
||||
previous_token_ids=previous_token_ids,
|
||||
current_token_ids=current_token_ids,
|
||||
delta_token_ids=delta_token_ids,
|
||||
)
|
||||
# Streaming currently expects at least one of reasoning content / content,
|
||||
# so the response should return None in that case.
|
||||
if param_dict["reasoning"] is None and param_dict["content"] is None:
|
||||
assert response is None
|
||||
else:
|
||||
assert isinstance(response, DeltaMessage)
|
||||
assert param_dict["reasoning"] == response.reasoning
|
||||
assert param_dict["content"] == response.content
|
||||
188
tests/reasoning/test_holo2_reasoning_parser.py
Normal file
188
tests/reasoning/test_holo2_reasoning_parser.py
Normal file
@@ -0,0 +1,188 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from tests.reasoning.utils import run_reasoning_extraction
|
||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||
from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
|
||||
from vllm.reasoning.holo2_reasoning_parser import Holo2ReasoningParser
|
||||
from vllm.reasoning.identity_reasoning_parser import IdentityReasoningParser
|
||||
|
||||
REASONING_MODEL_NAME = "HCompany/Holo2-4B"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def tokenizer():
|
||||
return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"thinking,expected_parser_type",
|
||||
[
|
||||
(True, DeepSeekR1ReasoningParser),
|
||||
(False, IdentityReasoningParser),
|
||||
],
|
||||
)
|
||||
def test_parser_selection(tokenizer, thinking, expected_parser_type):
|
||||
parser = Holo2ReasoningParser(
|
||||
tokenizer,
|
||||
chat_template_kwargs={
|
||||
"thinking": thinking,
|
||||
},
|
||||
)
|
||||
|
||||
assert isinstance(parser._parser, expected_parser_type)
|
||||
|
||||
|
||||
def test_holo2_default_parser_is_deepseekr1(tokenizer):
|
||||
parser = Holo2ReasoningParser(tokenizer)
|
||||
|
||||
assert isinstance(parser._parser, DeepSeekR1ReasoningParser)
|
||||
|
||||
|
||||
def test_holo2_supports_structured_output(tokenizer):
|
||||
# Structured output manager uses the reasoning parser to check if the
|
||||
# reasoning content is ended before applying the grammar. The main function
|
||||
# used is is_reasoning_end. This test checks if the parser is able to
|
||||
# correctly identify the end of the reasoning content.
|
||||
|
||||
# important to not pass chat_template_kwargs here as it is done in the
|
||||
# StructuredOutputManager
|
||||
parser = Holo2ReasoningParser(tokenizer)
|
||||
|
||||
end_token_id = tokenizer.encode("</think>", add_special_tokens=False)[0]
|
||||
|
||||
assert parser.is_reasoning_end([1, 2, 4, end_token_id])
|
||||
assert not parser.is_reasoning_end([1, 2, 4])
|
||||
assert parser.is_reasoning_end([1, 2, 4, end_token_id, 5])
|
||||
|
||||
|
||||
# thinking is True, non-streaming
|
||||
WITH_THINK = {
|
||||
"output": "This is a reasoning section</think>This is the rest",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
}
|
||||
# thinking is True, streaming
|
||||
WITH_THINK_STREAM = {
|
||||
"output": "This is a reasoning section</think>This is the rest",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
}
|
||||
# thinking is False, non-streaming
|
||||
THINKING_DISABLED = {
|
||||
"output": "This is the rest",
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
}
|
||||
# thinking is False, streaming
|
||||
THINKING_DISABLED_STREAM = {
|
||||
"output": "This is the rest",
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
}
|
||||
# thinking is False but the model output </think>, non-streaming
|
||||
THINKING_DISABLED_WITH_CLOSE_TAG = {
|
||||
"output": "</think>This is the rest",
|
||||
"reasoning": None,
|
||||
"content": "</think>This is the rest",
|
||||
}
|
||||
# thinking is False but the model output </think>, streaming
|
||||
THINKING_DISABLED_WITH_CLOSE_TAG_STREAM = {
|
||||
"output": "some text</think>This is the rest",
|
||||
"reasoning": None,
|
||||
"content": "some text</think>This is the rest",
|
||||
}
|
||||
COMPLETE_REASONING = {
|
||||
"output": "This is a reasoning section</think>",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
}
|
||||
|
||||
TEST_CASES = [
|
||||
pytest.param(
|
||||
False,
|
||||
WITH_THINK,
|
||||
None,
|
||||
id="with_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
WITH_THINK_STREAM,
|
||||
None,
|
||||
id="with_think_stream",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
WITH_THINK,
|
||||
{"thinking": True},
|
||||
id="with_think_enabled",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
WITH_THINK_STREAM,
|
||||
{"thinking": True},
|
||||
id="with_think_stream_enabled",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
THINKING_DISABLED,
|
||||
{"thinking": False},
|
||||
id="thinking_disabled",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
THINKING_DISABLED_STREAM,
|
||||
{"thinking": False},
|
||||
id="thinking_disabled_stream",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
THINKING_DISABLED_WITH_CLOSE_TAG,
|
||||
{"thinking": False},
|
||||
id="thinking_disabled_with_close_tag",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
THINKING_DISABLED_WITH_CLOSE_TAG_STREAM,
|
||||
{"thinking": False},
|
||||
id="thinking_disabled_with_close_tag_stream",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
COMPLETE_REASONING,
|
||||
None,
|
||||
id="complete_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
COMPLETE_REASONING,
|
||||
None,
|
||||
id="complete_reasoning_stream",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming, param_dict, chat_template_kwargs", TEST_CASES)
|
||||
def test_reasoning(
|
||||
streaming: bool,
|
||||
param_dict: dict,
|
||||
chat_template_kwargs: dict | None,
|
||||
tokenizer,
|
||||
):
|
||||
output = tokenizer.tokenize(param_dict["output"])
|
||||
output_tokens: list[str] = [
|
||||
tokenizer.convert_tokens_to_string([token]) for token in output
|
||||
]
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser("holo2")(
|
||||
tokenizer,
|
||||
chat_template_kwargs=chat_template_kwargs,
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning"]
|
||||
assert content == param_dict["content"]
|
||||
168
tests/reasoning/test_hunyuan_reasoning_parser.py
Normal file
168
tests/reasoning/test_hunyuan_reasoning_parser.py
Normal file
@@ -0,0 +1,168 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from tests.reasoning.utils import run_reasoning_extraction
|
||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||
|
||||
parser_name = "hunyuan_a13b"
|
||||
START_REASONING = "<think>\n"
|
||||
START_RESPONSE = "\n</think>\n<answer>\n"
|
||||
END_RESPONSE = "\n</answer>"
|
||||
|
||||
NO_REASONING_QUICK_THROUGHT = {
|
||||
"output": f"{START_REASONING}{START_RESPONSE}This is the rest{END_RESPONSE}", # noqa: E501
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
}
|
||||
|
||||
SIMPLE_REASONING = {
|
||||
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest{END_RESPONSE}", # noqa: E501
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
}
|
||||
COMPLETE_REASONING = {
|
||||
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
}
|
||||
|
||||
COMPLETE_REASONING_WITH_SYMBOL = {
|
||||
"output": f"{START_REASONING}This is a reasoning section!{START_RESPONSE}",
|
||||
"reasoning": "This is a reasoning section!",
|
||||
"content": None,
|
||||
}
|
||||
NO_REASONING = {
|
||||
"output": "This is content",
|
||||
"reasoning": None,
|
||||
"content": "This is content",
|
||||
}
|
||||
MULTIPLE_LINES = {
|
||||
"output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
|
||||
"reasoning": "This\nThat",
|
||||
"content": "This is the rest\nThat",
|
||||
}
|
||||
REASONING_WITH_THINK = {
|
||||
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest", # noqa: E501
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
}
|
||||
COMPLETE_REASONING_WITH_THINK = {
|
||||
"output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
}
|
||||
MULTIPLE_LINES_WITH_THINK = {
|
||||
"output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
|
||||
"reasoning": "This\nThat",
|
||||
"content": "This is the rest\nThat",
|
||||
}
|
||||
|
||||
TEST_CASES = [
|
||||
pytest.param(
|
||||
False,
|
||||
SIMPLE_REASONING,
|
||||
id="simple_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
COMPLETE_REASONING,
|
||||
id="complete_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
COMPLETE_REASONING_WITH_SYMBOL,
|
||||
id="complete_reasoning_with_symbol",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
NO_REASONING,
|
||||
id="no_reasoning",
|
||||
),
|
||||
pytest.param(False, NO_REASONING_QUICK_THROUGHT, id="no_reasoning_quick"),
|
||||
pytest.param(
|
||||
False,
|
||||
MULTIPLE_LINES,
|
||||
id="multiple_lines",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
REASONING_WITH_THINK,
|
||||
id="reasoning_with_think",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
COMPLETE_REASONING_WITH_THINK,
|
||||
id="complete_reasoning_with_think",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
MULTIPLE_LINES_WITH_THINK,
|
||||
id="multiple_lines_with_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
SIMPLE_REASONING,
|
||||
id="simple_reasoning_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
COMPLETE_REASONING,
|
||||
id="complete_reasoning_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
NO_REASONING,
|
||||
id="no_reasoning_streaming",
|
||||
),
|
||||
pytest.param(True, NO_REASONING_QUICK_THROUGHT, id="no_reasoning_quick_stream"),
|
||||
pytest.param(
|
||||
True,
|
||||
MULTIPLE_LINES,
|
||||
id="multiple_lines_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
REASONING_WITH_THINK,
|
||||
id="reasoning_with_think_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
COMPLETE_REASONING_WITH_THINK,
|
||||
id="complete_reasoning_with_think_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
MULTIPLE_LINES_WITH_THINK,
|
||||
id="multiple_lines_with_think_streaming",
|
||||
),
|
||||
]
|
||||
|
||||
# Global tokenizer initialization to avoid repeated loading
|
||||
tokenizer = AutoTokenizer.from_pretrained(
|
||||
"tencent/Hunyuan-A13B-Instruct", trust_remote_code=True
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
|
||||
def test_reasoning(
|
||||
streaming: bool,
|
||||
param_dict: dict,
|
||||
):
|
||||
output = tokenizer.tokenize(param_dict["output"])
|
||||
# decode everything to tokens
|
||||
output_tokens: list[str] = [
|
||||
tokenizer.convert_tokens_to_string([token]) for token in output
|
||||
]
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
tokenizer
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning"]
|
||||
assert content == param_dict["content"]
|
||||
195
tests/reasoning/test_minimax_m2_append_reasoning_parser.py
Normal file
195
tests/reasoning/test_minimax_m2_append_reasoning_parser.py
Normal file
@@ -0,0 +1,195 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from tests.reasoning.utils import run_reasoning_extraction
|
||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||
|
||||
parser_name = "minimax_m2_append_think"
|
||||
end_token = "</think>"
|
||||
|
||||
# MiniMax M2 model path
|
||||
REASONING_MODEL_NAME = "MiniMaxAI/MiniMax-M2"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def minimax_m2_tokenizer():
|
||||
return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# MiniMaxM2AppendThinkReasoningParser behavior:
|
||||
# - Prepends <think> to the beginning of the output
|
||||
# - Does NOT separate reasoning and content
|
||||
# - Returns everything as content (with <think> prepended)
|
||||
# - reasoning is always None
|
||||
#
|
||||
# This parser is used when you want to keep the raw output with <think> added
|
||||
# =============================================================================
|
||||
|
||||
# Case: simple output with end token
|
||||
SIMPLE_OUTPUT = {
|
||||
"output": "This is reasoning</think>This is response",
|
||||
"reasoning": None,
|
||||
"content": "<think>This is reasoning</think>This is response",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
# Case: output without end token (reasoning in progress)
|
||||
NO_END_TOKEN = {
|
||||
"output": "This is reasoning in progress",
|
||||
"reasoning": None,
|
||||
"content": "<think>This is reasoning in progress",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
|
||||
# Case: only end token
|
||||
ONLY_END_TOKEN = {
|
||||
"output": "</think>This is response",
|
||||
"reasoning": None,
|
||||
"content": "<think></think>This is response",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
# Case: multiple lines
|
||||
MULTIPLE_LINES = {
|
||||
"output": "Line 1\nLine 2</think>Response 1\nResponse 2",
|
||||
"reasoning": None,
|
||||
"content": "<think>Line 1\nLine 2</think>Response 1\nResponse 2",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
# Case: empty output (non-streaming prepends <think>)
|
||||
EMPTY = {
|
||||
"output": "",
|
||||
"reasoning": None,
|
||||
"content": "<think>",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
|
||||
# Case: empty output streaming (no tokens = no output)
|
||||
EMPTY_STREAMING = {
|
||||
"output": "",
|
||||
"reasoning": None,
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
|
||||
# Case: special characters
|
||||
SPECIAL_CHARS = {
|
||||
"output": "Let me think... 1+1=2</think>Yes!",
|
||||
"reasoning": None,
|
||||
"content": "<think>Let me think... 1+1=2</think>Yes!",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
# Case: code in output
|
||||
CODE_OUTPUT = {
|
||||
"output": "```python\nprint('hi')\n```</think>Here's the code.",
|
||||
"reasoning": None,
|
||||
"content": "<think>```python\nprint('hi')\n```</think>Here's the code.",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
TEST_CASES = [
|
||||
pytest.param(
|
||||
False,
|
||||
SIMPLE_OUTPUT,
|
||||
id="simple_output",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
SIMPLE_OUTPUT,
|
||||
id="simple_output_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
NO_END_TOKEN,
|
||||
id="no_end_token",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
NO_END_TOKEN,
|
||||
id="no_end_token_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
ONLY_END_TOKEN,
|
||||
id="only_end_token",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
ONLY_END_TOKEN,
|
||||
id="only_end_token_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
MULTIPLE_LINES,
|
||||
id="multiple_lines",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
MULTIPLE_LINES,
|
||||
id="multiple_lines_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
EMPTY,
|
||||
id="empty",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
EMPTY_STREAMING,
|
||||
id="empty_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
SPECIAL_CHARS,
|
||||
id="special_chars",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
SPECIAL_CHARS,
|
||||
id="special_chars_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
CODE_OUTPUT,
|
||||
id="code_output",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
CODE_OUTPUT,
|
||||
id="code_output_streaming",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
|
||||
def test_reasoning(
|
||||
streaming: bool,
|
||||
param_dict: dict,
|
||||
minimax_m2_tokenizer,
|
||||
):
|
||||
output = minimax_m2_tokenizer.tokenize(param_dict["output"])
|
||||
# decode everything to tokens
|
||||
output_tokens: list[str] = [
|
||||
minimax_m2_tokenizer.convert_tokens_to_string([token]) for token in output
|
||||
]
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
minimax_m2_tokenizer
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning"]
|
||||
assert content == param_dict["content"]
|
||||
|
||||
# Test is_reasoning_end
|
||||
output_ids = minimax_m2_tokenizer.convert_tokens_to_ids(output)
|
||||
is_reasoning_end = parser.is_reasoning_end(output_ids)
|
||||
assert is_reasoning_end == param_dict["is_reasoning_end"]
|
||||
230
tests/reasoning/test_minimax_m2_reasoning_parser.py
Normal file
230
tests/reasoning/test_minimax_m2_reasoning_parser.py
Normal file
@@ -0,0 +1,230 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from tests.reasoning.utils import run_reasoning_extraction
|
||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||
|
||||
parser_name = "minimax_m2"
|
||||
end_token = "</think>"
|
||||
|
||||
# MiniMax M2 model path
|
||||
REASONING_MODEL_NAME = "MiniMaxAI/MiniMax-M2"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def minimax_m2_tokenizer():
|
||||
return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# MiniMax M2 specific behavior:
|
||||
# - Model does NOT generate <think> start token
|
||||
# - Model only generates </think> end token
|
||||
# - All content before </think> is reasoning
|
||||
# - All content after </think> is the actual response (content)
|
||||
# =============================================================================
|
||||
|
||||
# Case: reasoning + end token + content (typical case)
|
||||
SIMPLE_REASONING = {
|
||||
"output": "This is a reasoning section</think>This is the rest",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
# Case: reasoning + end token only (no content after)
|
||||
COMPLETE_REASONING = {
|
||||
"output": "This is a reasoning section</think>",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
# Case: no end token yet (streaming in progress, all is reasoning)
|
||||
NO_END_TOKEN = {
|
||||
"output": "This is reasoning in progress",
|
||||
"reasoning": "This is reasoning in progress",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
|
||||
# Case: multiple lines of reasoning
|
||||
MULTIPLE_LINES = {
|
||||
"output": "First line\nSecond line</think>Response first line\nResponse second",
|
||||
"reasoning": "First line\nSecond line",
|
||||
"content": "Response first line\nResponse second",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
# Case: only end token (empty reasoning, immediate response)
|
||||
SHORTEST_REASONING_NO_STREAMING = {
|
||||
"output": "</think>This is the response",
|
||||
"reasoning": "",
|
||||
"content": "This is the response",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
# Case: only end token streaming (reasoning is None because it's just the token)
|
||||
SHORTEST_REASONING_STREAMING = {
|
||||
"output": "</think>This is the response",
|
||||
"reasoning": None,
|
||||
"content": "This is the response",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
# Case: empty output
|
||||
EMPTY = {
|
||||
"output": "",
|
||||
"reasoning": "",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
|
||||
# Case: empty streaming
|
||||
EMPTY_STREAMING = {
|
||||
"output": "",
|
||||
"reasoning": None,
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
|
||||
# Case: long reasoning with special characters
|
||||
SPECIAL_CHARS = {
|
||||
"output": "Let me think... 1+1=2, right?</think>Yes, 1+1=2.",
|
||||
"reasoning": "Let me think... 1+1=2, right?",
|
||||
"content": "Yes, 1+1=2.",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
# Case: reasoning with code blocks
|
||||
CODE_IN_REASONING = {
|
||||
"output": "```python\nprint('hello')\n```</think>Here is the code.",
|
||||
"reasoning": "```python\nprint('hello')\n```",
|
||||
"content": "Here is the code.",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
TEST_CASES = [
|
||||
# Core cases: no start token (MiniMax M2 actual behavior)
|
||||
pytest.param(
|
||||
False,
|
||||
SIMPLE_REASONING,
|
||||
id="simple_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
SIMPLE_REASONING,
|
||||
id="simple_reasoning_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
COMPLETE_REASONING,
|
||||
id="complete_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
COMPLETE_REASONING,
|
||||
id="complete_reasoning_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
NO_END_TOKEN,
|
||||
id="no_end_token",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
NO_END_TOKEN,
|
||||
id="no_end_token_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
MULTIPLE_LINES,
|
||||
id="multiple_lines",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
MULTIPLE_LINES,
|
||||
id="multiple_lines_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
SHORTEST_REASONING_NO_STREAMING,
|
||||
id="shortest_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
SHORTEST_REASONING_STREAMING,
|
||||
id="shortest_reasoning_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
EMPTY,
|
||||
id="empty",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
EMPTY_STREAMING,
|
||||
id="empty_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
SPECIAL_CHARS,
|
||||
id="special_chars",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
SPECIAL_CHARS,
|
||||
id="special_chars_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
CODE_IN_REASONING,
|
||||
id="code_in_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
CODE_IN_REASONING,
|
||||
id="code_in_reasoning_streaming",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
|
||||
def test_reasoning(
|
||||
streaming: bool,
|
||||
param_dict: dict,
|
||||
minimax_m2_tokenizer,
|
||||
):
|
||||
output = minimax_m2_tokenizer.tokenize(param_dict["output"])
|
||||
# decode everything to tokens
|
||||
output_tokens: list[str] = [
|
||||
minimax_m2_tokenizer.convert_tokens_to_string([token]) for token in output
|
||||
]
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
minimax_m2_tokenizer
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning"]
|
||||
assert content == param_dict["content"]
|
||||
|
||||
# Test is_reasoning_end
|
||||
output_ids = minimax_m2_tokenizer.convert_tokens_to_ids(output)
|
||||
is_reasoning_end = parser.is_reasoning_end(output_ids)
|
||||
assert is_reasoning_end == param_dict["is_reasoning_end"]
|
||||
|
||||
# Test extract_content
|
||||
if param_dict["content"] is not None:
|
||||
content = parser.extract_content_ids(output_ids)
|
||||
assert content == minimax_m2_tokenizer.convert_tokens_to_ids(
|
||||
minimax_m2_tokenizer.tokenize(param_dict["content"])
|
||||
)
|
||||
else:
|
||||
content = parser.extract_content_ids(output)
|
||||
assert content == []
|
||||
348
tests/reasoning/test_mistral_reasoning_parser.py
Normal file
348
tests/reasoning/test_mistral_reasoning_parser.py
Normal file
@@ -0,0 +1,348 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.reasoning.utils import run_reasoning_extraction_mistral
|
||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||
from vllm.tokenizers.mistral import MistralTokenizer
|
||||
|
||||
parser_name = "mistral"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def mistral_tokenizer():
|
||||
mistral_tokenizer = MistralTokenizer.from_pretrained(
|
||||
"mistralai/Magistral-Small-2509"
|
||||
)
|
||||
return mistral_tokenizer
|
||||
|
||||
|
||||
INVALID_SIMPLE_REASONING = {
|
||||
"output": "This is a reasoning section[/THINK]This is the rest",
|
||||
"reasoning": None,
|
||||
"content": "This is a reasoning sectionThis is the rest",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
INVALID_COMPLETE_REASONING = {
|
||||
"output": "This is a reasoning section[/THINK]",
|
||||
"reasoning": None,
|
||||
"content": "This is a reasoning section",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
NO_CONTENT = {
|
||||
"output": "[THINK]This is reasoning",
|
||||
"reasoning": "This is reasoning",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
NO_REASONING = {
|
||||
"output": "This is content",
|
||||
"reasoning": None,
|
||||
"content": "This is content",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
NO_REASONING_STREAMING = {
|
||||
"output": "This is a reasoning section",
|
||||
"reasoning": None,
|
||||
"content": "This is a reasoning section",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
INVALID_MULTIPLE_LINES = {
|
||||
"output": "This\nThat[/THINK]This is the rest\nThat",
|
||||
"reasoning": None,
|
||||
"content": "This\nThatThis is the rest\nThat",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
INVALID_SHORTEST_REASONING_NO_STREAMING = {
|
||||
"output": "[/THINK]This is the rest",
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
INVALID_SHORTEST_REASONING = {
|
||||
"output": "[/THINK]This is the rest",
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
REASONING_WITH_THINK = {
|
||||
"output": "[THINK]This is a reasoning section[/THINK]This is the rest",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
COMPLETE_REASONING_WITH_THINK = {
|
||||
"output": "[THINK]This is a reasoning section[/THINK]",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
MULTIPLE_LINES_WITH_THINK = {
|
||||
"output": "[THINK]This\nThat[/THINK]This is the rest\nThat",
|
||||
"reasoning": "This\nThat",
|
||||
"content": "This is the rest\nThat",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
INVALID_SHORTEST_REASONING_NO_STREAMING_WITH_THINK = {
|
||||
"output": "[/THINK]This is the rest",
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
INVALID_SHORTEST_REASONING_WITH_THINK = {
|
||||
"output": "[/THINK]This is the rest",
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
THINK_NO_END = {
|
||||
"output": "[THINK]This is a reasoning section",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
EMPTY = {
|
||||
"output": "",
|
||||
"reasoning": None,
|
||||
"content": "",
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
EMPTY_STREAMING = {
|
||||
"output": "",
|
||||
"reasoning": None,
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
NEW_LINE = {
|
||||
"output": "Before\n[THINK]This is a reasoning section[/THINK]\nThis is the rest",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "Before\n\nThis is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
NEW_LINE_STREAMING = {
|
||||
"output": "Before\n[THINK]This is a reasoning section[/THINK]\nThis is the rest",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "Before\n\nThis is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
|
||||
TEST_CASES = [
|
||||
pytest.param(
|
||||
False,
|
||||
INVALID_SIMPLE_REASONING,
|
||||
id="invalid_simple_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
INVALID_SIMPLE_REASONING,
|
||||
id="invalid_simple_reasoning_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
INVALID_COMPLETE_REASONING,
|
||||
id="invalid_complete_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
INVALID_COMPLETE_REASONING,
|
||||
id="invalid_complete_reasoning_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
NO_CONTENT,
|
||||
id="no_content",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
NO_REASONING,
|
||||
id="no_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
NO_REASONING_STREAMING,
|
||||
id="no_reasoning_token_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
INVALID_MULTIPLE_LINES,
|
||||
id="invalid_multiple_lines",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
INVALID_MULTIPLE_LINES,
|
||||
id="invalid_multiple_lines_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
INVALID_SHORTEST_REASONING,
|
||||
id="invalid_shortest",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
INVALID_SHORTEST_REASONING_NO_STREAMING,
|
||||
id="invalid_shortest_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
REASONING_WITH_THINK,
|
||||
id="reasoning_with_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
REASONING_WITH_THINK,
|
||||
id="reasoning_with_think_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
COMPLETE_REASONING_WITH_THINK,
|
||||
id="complete_reasoning_with_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
COMPLETE_REASONING_WITH_THINK,
|
||||
id="complete_reasoning_with_think_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
MULTIPLE_LINES_WITH_THINK,
|
||||
id="multiple_lines_with_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
MULTIPLE_LINES_WITH_THINK,
|
||||
id="multiple_lines_with_think_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
INVALID_SHORTEST_REASONING_NO_STREAMING_WITH_THINK,
|
||||
id="invalid_shortest_with_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
INVALID_SHORTEST_REASONING_WITH_THINK,
|
||||
id="invalid_shortest_with_think_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
THINK_NO_END,
|
||||
id="think_no_end",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
THINK_NO_END,
|
||||
id="think_no_end_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
EMPTY,
|
||||
id="empty",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
EMPTY_STREAMING,
|
||||
id="empty_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
NEW_LINE,
|
||||
id="new_line",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
NEW_LINE_STREAMING,
|
||||
id="new_line_streaming",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
|
||||
def test_mistral_reasoning(
|
||||
streaming: bool,
|
||||
param_dict: dict,
|
||||
mistral_tokenizer: MistralTokenizer,
|
||||
):
|
||||
output = param_dict["output"]
|
||||
|
||||
index_think = output.find("[THINK]")
|
||||
len_think = len("[THINK]")
|
||||
index_end_think = output.find("[/THINK]")
|
||||
len_end_think = len("[/THINK]")
|
||||
|
||||
# encode everything to tokens ids
|
||||
output_tokens = []
|
||||
if index_think != -1:
|
||||
output_before_think = output[:index_think]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_before_think, False, False
|
||||
)
|
||||
output_tokens += [mistral_tokenizer.instruct.BEGIN_THINK]
|
||||
|
||||
if index_end_think != -1:
|
||||
output_middle = output[index_think + len_think : index_end_think]
|
||||
output_after_think = output[index_end_think + len_end_think :]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_middle, False, False
|
||||
)
|
||||
output_tokens += [mistral_tokenizer.instruct.END_THINK]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_after_think, False, False
|
||||
)
|
||||
else:
|
||||
output_middle = output[index_think + len_think :]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_middle, False, False
|
||||
)
|
||||
elif index_end_think != -1:
|
||||
output_before_think = output[:index_end_think]
|
||||
output_after_think = output[index_end_think + len_end_think :]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_before_think, False, False
|
||||
)
|
||||
output_tokens += [mistral_tokenizer.instruct.END_THINK]
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(
|
||||
output_after_think, False, False
|
||||
)
|
||||
else:
|
||||
output_tokens += mistral_tokenizer.tokenizer.encode(output, False, False)
|
||||
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
mistral_tokenizer
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction_mistral(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning"]
|
||||
assert content == param_dict["content"]
|
||||
|
||||
# Test is_reasoning_end
|
||||
is_reasoning_end = parser.is_reasoning_end(output_tokens)
|
||||
assert is_reasoning_end == param_dict["is_reasoning_end"]
|
||||
|
||||
# Test extract_content
|
||||
if param_dict["content"] is not None:
|
||||
# Handle the case where there are tokens outputted before Thinking.
|
||||
# This should not occur if the model is well trained and prompted.
|
||||
if "[THINK]" in param_dict["output"] and not param_dict["output"].startswith(
|
||||
"[THINK]"
|
||||
):
|
||||
before_content = param_dict["output"].split("[THINK]")[0]
|
||||
before_token_ids = mistral_tokenizer.tokenizer.encode(
|
||||
before_content, bos=False, eos=False
|
||||
)
|
||||
left_to_encode = param_dict["content"][len(before_content) :]
|
||||
# Normal situation.
|
||||
else:
|
||||
before_token_ids = []
|
||||
left_to_encode = param_dict["content"]
|
||||
|
||||
content_tokens = parser.extract_content_ids(output_tokens)
|
||||
expected_token_ids = before_token_ids + mistral_tokenizer.tokenizer.encode(
|
||||
left_to_encode, bos=False, eos=False
|
||||
)
|
||||
assert content_tokens == expected_token_ids
|
||||
else:
|
||||
content = parser.extract_content_ids(output_tokens)
|
||||
assert content == []
|
||||
152
tests/reasoning/test_olmo3_reasoning_parser.py
Normal file
152
tests/reasoning/test_olmo3_reasoning_parser.py
Normal file
@@ -0,0 +1,152 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from tests.reasoning.utils import run_reasoning_extraction
|
||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||
|
||||
parser_name = "olmo3"
|
||||
START_REASONING = "<think>"
|
||||
END_REASONING = "</think>"
|
||||
|
||||
NO_REASONING = {
|
||||
"output": f"{START_REASONING}{END_REASONING}No thoughts, head empty!",
|
||||
"reasoning": None,
|
||||
"content": "No thoughts, head empty!",
|
||||
}
|
||||
|
||||
NO_REASONING_WITH_NEWLINE = {
|
||||
"output": f"{START_REASONING}\n{END_REASONING}\n\nNo thoughts, head empty!",
|
||||
"reasoning": "\n",
|
||||
"content": "\n\nNo thoughts, head empty!",
|
||||
}
|
||||
|
||||
SIMPLE_REASONING = {
|
||||
"output": f"{START_REASONING}This is a reasoning section{END_REASONING}This is the rest", # noqa: E501
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
}
|
||||
|
||||
SIMPLE_REASONING_WITH_NEWLINE = {
|
||||
"output": f"{START_REASONING} Look!\n\nI'm thinking...{END_REASONING}\nThis is the rest", # noqa: E501
|
||||
"reasoning": " Look!\n\nI'm thinking...",
|
||||
"content": "\nThis is the rest",
|
||||
}
|
||||
|
||||
SIMPLE_REASONING_WITH_MULTIPLE_NEWLINES = {
|
||||
"output": f"{START_REASONING}\nLook!\nI'm thinking...\n\n{END_REASONING}\n\n\nThis is the rest", # noqa: E501
|
||||
"reasoning": "\nLook!\nI'm thinking...\n\n",
|
||||
"content": "\n\n\nThis is the rest",
|
||||
}
|
||||
|
||||
NO_REASONING_ONLY_END_THINK = {
|
||||
"output": f"{END_REASONING}\n\nNo thoughts, head empty!",
|
||||
"reasoning": None,
|
||||
"content": "\n\nNo thoughts, head empty!",
|
||||
}
|
||||
|
||||
REASONING_ONLY_END_THINK = {
|
||||
"output": f"The user is asking me not to think.{END_REASONING}No thoughts!",
|
||||
"reasoning": "The user is asking me not to think.",
|
||||
"content": "No thoughts!",
|
||||
}
|
||||
|
||||
TEST_CASES = [
|
||||
pytest.param(
|
||||
False, # not streaming
|
||||
NO_REASONING,
|
||||
id="no_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
False, # not streaming
|
||||
NO_REASONING_WITH_NEWLINE,
|
||||
id="no_reasoning_with_newline",
|
||||
),
|
||||
pytest.param(
|
||||
False, # not streaming
|
||||
SIMPLE_REASONING,
|
||||
id="simple_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
False, # not streaming
|
||||
SIMPLE_REASONING_WITH_NEWLINE,
|
||||
id="simple_reasoning_with_newline",
|
||||
),
|
||||
pytest.param(
|
||||
True, # enable streaming
|
||||
SIMPLE_REASONING_WITH_MULTIPLE_NEWLINES,
|
||||
id="simple_reasoning_with_multiple_newlines",
|
||||
),
|
||||
pytest.param(
|
||||
False, # not streaming
|
||||
NO_REASONING_ONLY_END_THINK,
|
||||
id="no_reasoning_only_end_think",
|
||||
),
|
||||
pytest.param(
|
||||
False, # not streaming
|
||||
REASONING_ONLY_END_THINK,
|
||||
id="yes_reasoning_only_end_think",
|
||||
),
|
||||
pytest.param(
|
||||
True, # enable streaming
|
||||
NO_REASONING,
|
||||
id="no_reasoning_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True, # enable streaming
|
||||
NO_REASONING_WITH_NEWLINE,
|
||||
id="no_reasoning_with_newline_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True, # enable streaming
|
||||
SIMPLE_REASONING,
|
||||
id="simple_reasoning_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True, # enable streaming
|
||||
SIMPLE_REASONING_WITH_NEWLINE,
|
||||
id="simple_reasoning_with_newline_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True, # enable streaming
|
||||
SIMPLE_REASONING_WITH_MULTIPLE_NEWLINES,
|
||||
id="simple_reasoning_with_multiple_newlines_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True, # enable streaming
|
||||
NO_REASONING_ONLY_END_THINK,
|
||||
id="no_reasoning_only_end_think_streaming",
|
||||
),
|
||||
pytest.param(
|
||||
True, # enable streaming
|
||||
REASONING_ONLY_END_THINK,
|
||||
id="yes_reasoning_only_end_think_streaming",
|
||||
),
|
||||
]
|
||||
|
||||
# Global tokenizer initialization to avoid repeated loading
|
||||
tokenizer = AutoTokenizer.from_pretrained("allenai/dolma2-tokenizer")
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
|
||||
def test_reasoning(
|
||||
streaming: bool,
|
||||
param_dict: dict[str, str],
|
||||
):
|
||||
output = tokenizer.tokenize(param_dict["output"])
|
||||
|
||||
# decode everything to tokens
|
||||
model_output: list[str] = [
|
||||
tokenizer.convert_tokens_to_string([token]) for token in output
|
||||
]
|
||||
parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
|
||||
parser: ReasoningParser = parser_cls(tokenizer)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
reasoning_parser=parser, model_output=model_output, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning"]
|
||||
assert content == param_dict["content"]
|
||||
142
tests/reasoning/test_qwen3_reasoning_parser.py
Normal file
142
tests/reasoning/test_qwen3_reasoning_parser.py
Normal file
@@ -0,0 +1,142 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from tests.reasoning.utils import run_reasoning_extraction
|
||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||
|
||||
parser_name = "qwen3"
|
||||
start_token = "<think>"
|
||||
end_token = "</think>"
|
||||
|
||||
REASONING_MODEL_NAME = "Qwen/Qwen3-0.6B"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def qwen3_tokenizer():
|
||||
return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
|
||||
|
||||
|
||||
# 带 <think></think>,非stream
|
||||
WITH_THINK = {
|
||||
"output": "<think>This is a reasoning section</think>This is the rest",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
}
|
||||
# 带 <think></think>,stream
|
||||
WITH_THINK_STREAM = {
|
||||
"output": "<think>This is a reasoning section</think>This is the rest",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
}
|
||||
# 不带 <think></think>,非stream
|
||||
WITHOUT_THINK = {
|
||||
"output": "This is the rest",
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
}
|
||||
# 不带 <think></think>,stream
|
||||
WITHOUT_THINK_STREAM = {
|
||||
"output": "This is the rest",
|
||||
"reasoning": None,
|
||||
"content": "This is the rest",
|
||||
}
|
||||
|
||||
COMPLETE_REASONING = {
|
||||
"output": "<think>This is a reasoning section</think>",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
}
|
||||
MULTILINE_REASONING = {
|
||||
"output": "<think>This is a reasoning\nsection</think>This is the rest\nThat",
|
||||
"reasoning": "This is a reasoning\nsection",
|
||||
"content": "This is the rest\nThat",
|
||||
}
|
||||
ONLY_OPEN_TAG = {
|
||||
"output": "<think>This is a reasoning section",
|
||||
"reasoning": None,
|
||||
"content": "<think>This is a reasoning section",
|
||||
}
|
||||
|
||||
ONLY_OPEN_TAG_STREAM = {
|
||||
"output": "<think>This is a reasoning section",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
}
|
||||
|
||||
TEST_CASES = [
|
||||
pytest.param(
|
||||
False,
|
||||
WITH_THINK,
|
||||
id="with_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
WITH_THINK_STREAM,
|
||||
id="with_think_stream",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
WITHOUT_THINK,
|
||||
id="without_think",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
WITHOUT_THINK_STREAM,
|
||||
id="without_think_stream",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
COMPLETE_REASONING,
|
||||
id="complete_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
COMPLETE_REASONING,
|
||||
id="complete_reasoning_stream",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
MULTILINE_REASONING,
|
||||
id="multiline_reasoning",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
MULTILINE_REASONING,
|
||||
id="multiline_reasoning_stream",
|
||||
),
|
||||
pytest.param(
|
||||
False,
|
||||
ONLY_OPEN_TAG,
|
||||
id="only_open_tag",
|
||||
),
|
||||
pytest.param(
|
||||
True,
|
||||
ONLY_OPEN_TAG_STREAM,
|
||||
id="only_open_tag_stream",
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
|
||||
def test_reasoning(
|
||||
streaming: bool,
|
||||
param_dict: dict,
|
||||
qwen3_tokenizer,
|
||||
):
|
||||
output = qwen3_tokenizer.tokenize(param_dict["output"])
|
||||
output_tokens: list[str] = [
|
||||
qwen3_tokenizer.convert_tokens_to_string([token]) for token in output
|
||||
]
|
||||
parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
|
||||
qwen3_tokenizer
|
||||
)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, output_tokens, streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == param_dict["reasoning"]
|
||||
assert content == param_dict["content"]
|
||||
236
tests/reasoning/test_seedoss_reasoning_parser.py
Normal file
236
tests/reasoning/test_seedoss_reasoning_parser.py
Normal file
@@ -0,0 +1,236 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from typing import Any, cast
|
||||
|
||||
import pytest
|
||||
from transformers import AutoTokenizer
|
||||
|
||||
from tests.reasoning.utils import run_reasoning_extraction
|
||||
from vllm.reasoning import ReasoningParser, ReasoningParserManager
|
||||
|
||||
parser_name = "seed_oss"
|
||||
start_token = "<seed:think>"
|
||||
end_token = "</seed:think>"
|
||||
|
||||
# Use a test model that contains our custom tokens
|
||||
REASONING_MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def seedoss_tokenizer():
|
||||
tokenizer = AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
|
||||
# Add custom SeedOSS tokens if they don't exist
|
||||
if start_token not in tokenizer.get_vocab():
|
||||
tokenizer.add_tokens([start_token, end_token])
|
||||
return tokenizer
|
||||
|
||||
|
||||
SIMPLE_REASONING: dict[str, Any] = {
|
||||
"output": "This is a reasoning section</seed:think>This is the rest",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
COMPLETE_REASONING: dict[str, Any] = {
|
||||
"output": "This is a reasoning section</seed:think>",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
NO_CONTENT: dict[str, Any] = {
|
||||
"output": "This is content",
|
||||
"reasoning": "This is content",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
NO_REASONING_STREAMING: dict[str, Any] = {
|
||||
"output": "This is a reasoning section",
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
MULTIPLE_LINES: dict[str, Any] = {
|
||||
"output": "This\nThat</seed:think>This is the rest\nThat",
|
||||
"reasoning": "This\nThat",
|
||||
"content": "This is the rest\nThat",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
WITH_START_TOKEN: dict[str, Any] = {
|
||||
"output": ("<seed:think>This is a reasoning section</seed:think>This is the rest"),
|
||||
"reasoning": "This is a reasoning section",
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
ONLY_END_TOKEN: dict[str, Any] = {
|
||||
"output": "Some reasoning</seed:think>This is the rest",
|
||||
"reasoning": "Some reasoning",
|
||||
"content": "This is the rest",
|
||||
"is_reasoning_end": True,
|
||||
}
|
||||
NO_TOKENS: dict[str, Any] = {
|
||||
"output": "This is just content without any reasoning tokens",
|
||||
"reasoning": "This is just content without any reasoning tokens",
|
||||
"content": None,
|
||||
"is_reasoning_end": False,
|
||||
}
|
||||
|
||||
|
||||
def test_seedoss_reasoning_parser_creation(seedoss_tokenizer):
|
||||
"""Test that the SeedOSS reasoning parser can be created and registered."""
|
||||
parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
assert isinstance(parser, ReasoningParser)
|
||||
assert parser.start_token == start_token
|
||||
assert parser.end_token == end_token
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming", [True, False])
|
||||
def test_simple_reasoning(seedoss_tokenizer, streaming):
|
||||
"""Test basic reasoning extraction with both tokens."""
|
||||
parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, [cast(str, SIMPLE_REASONING["output"])], streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == SIMPLE_REASONING["reasoning"]
|
||||
assert content == SIMPLE_REASONING["content"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming", [True, False])
|
||||
def test_complete_reasoning(seedoss_tokenizer, streaming):
|
||||
"""Test reasoning extraction when there's no content after reasoning."""
|
||||
parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, [cast(str, COMPLETE_REASONING["output"])], streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == COMPLETE_REASONING["reasoning"]
|
||||
assert content == COMPLETE_REASONING["content"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming", [True, False])
|
||||
def test_no_content(seedoss_tokenizer, streaming):
|
||||
"""Test when there's no end token - everything is reasoning content."""
|
||||
parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, [cast(str, NO_CONTENT["output"])], streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == NO_CONTENT["reasoning"]
|
||||
assert content == NO_CONTENT["content"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming", [True, False])
|
||||
def test_multiple_lines(seedoss_tokenizer, streaming):
|
||||
"""Test reasoning extraction with multiline content."""
|
||||
parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, [cast(str, MULTIPLE_LINES["output"])], streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == MULTIPLE_LINES["reasoning"]
|
||||
assert content == MULTIPLE_LINES["content"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming", [True, False])
|
||||
def test_with_start_token(seedoss_tokenizer, streaming):
|
||||
"""Test reasoning extraction with both start and end tokens."""
|
||||
parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, [cast(str, WITH_START_TOKEN["output"])], streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == WITH_START_TOKEN["reasoning"]
|
||||
assert content == WITH_START_TOKEN["content"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming", [True, False])
|
||||
def test_only_end_token(seedoss_tokenizer, streaming):
|
||||
"""
|
||||
Test reasoning extraction with only end token
|
||||
(SeedOSS typical behavior).
|
||||
"""
|
||||
parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, [cast(str, ONLY_END_TOKEN["output"])], streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == ONLY_END_TOKEN["reasoning"]
|
||||
assert content == ONLY_END_TOKEN["content"]
|
||||
|
||||
|
||||
@pytest.mark.parametrize("streaming", [True, False])
|
||||
def test_no_tokens(seedoss_tokenizer, streaming):
|
||||
"""Test when there are no reasoning tokens at all."""
|
||||
parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
reasoning, content = run_reasoning_extraction(
|
||||
parser, [cast(str, NO_TOKENS["output"])], streaming=streaming
|
||||
)
|
||||
|
||||
assert reasoning == NO_TOKENS["reasoning"]
|
||||
assert content == NO_TOKENS["content"]
|
||||
|
||||
|
||||
def test_is_reasoning_end(seedoss_tokenizer):
|
||||
"""Test the is_reasoning_end method."""
|
||||
parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
# Test with end token present
|
||||
end_token_id = parser.end_token_id
|
||||
assert parser.is_reasoning_end([1, 2, end_token_id, 4]) is True
|
||||
|
||||
# Test without end token
|
||||
assert parser.is_reasoning_end([1, 2, 3, 4]) is False
|
||||
|
||||
|
||||
def test_extract_content_ids(seedoss_tokenizer):
|
||||
"""Test the extract_content_ids method."""
|
||||
parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
end_token_id = parser.end_token_id
|
||||
|
||||
# Test with end token in the middle
|
||||
input_ids = [1, 2, end_token_id, 4, 5]
|
||||
content_ids = parser.extract_content_ids(input_ids)
|
||||
assert content_ids == [4, 5]
|
||||
|
||||
# Test with end token at the end
|
||||
input_ids = [1, 2, 3, end_token_id]
|
||||
content_ids = parser.extract_content_ids(input_ids)
|
||||
assert content_ids == []
|
||||
|
||||
# Test without end token
|
||||
input_ids = [1, 2, 3, 4]
|
||||
content_ids = parser.extract_content_ids(input_ids)
|
||||
assert content_ids == []
|
||||
|
||||
|
||||
def test_streaming_delta_processing(seedoss_tokenizer):
|
||||
"""Test streaming processing with small deltas."""
|
||||
parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
|
||||
parser = parser_cls(seedoss_tokenizer)
|
||||
|
||||
# Test streaming with incremental tokens
|
||||
deltas = ["Some ", "reasoning ", "content", "</seed:think>", "Final ", "answer"]
|
||||
|
||||
reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)
|
||||
|
||||
assert reasoning == "Some reasoning content"
|
||||
assert content == "Final answer"
|
||||
160
tests/reasoning/utils.py
Normal file
160
tests/reasoning/utils.py
Normal file
@@ -0,0 +1,160 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
|
||||
from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
|
||||
from vllm.reasoning import ReasoningParser
|
||||
from vllm.tokenizers.mistral import MistralTokenizer
|
||||
|
||||
|
||||
class StreamingReasoningReconstructor:
|
||||
def __init__(self):
|
||||
self.reasoning = None
|
||||
self.other_content = None
|
||||
|
||||
def append_delta(self, delta: DeltaMessage):
|
||||
# content and the reasoning content should not be present
|
||||
# at the same time
|
||||
assert delta.content is None or delta.reasoning is None, (
|
||||
"Both content and reasoning content are present in the delta message"
|
||||
)
|
||||
assert delta.reasoning == delta.reasoning_content, (
|
||||
"reasoning_content should be present for backwards compatibility"
|
||||
)
|
||||
if delta.content is not None:
|
||||
if self.other_content is None:
|
||||
self.other_content = delta.content
|
||||
else:
|
||||
self.other_content += delta.content
|
||||
else:
|
||||
if self.reasoning is None:
|
||||
self.reasoning = delta.reasoning
|
||||
else:
|
||||
self.reasoning += delta.reasoning
|
||||
|
||||
|
||||
def run_reasoning_extraction(
|
||||
reasoning_parser: ReasoningParser,
|
||||
model_output: list[str],
|
||||
request: ChatCompletionRequest | None = None,
|
||||
streaming: bool = False,
|
||||
) -> tuple[str | None, str | None]:
|
||||
if streaming:
|
||||
reconstructor = run_reasoning_extraction_streaming(
|
||||
reasoning_parser,
|
||||
model_output,
|
||||
request,
|
||||
)
|
||||
return (
|
||||
reconstructor.reasoning,
|
||||
reconstructor.other_content or None,
|
||||
)
|
||||
else:
|
||||
reasoning, content = run_reasoning_extraction_nonstreaming(
|
||||
reasoning_parser, model_output, request
|
||||
)
|
||||
return reasoning, content
|
||||
|
||||
|
||||
def run_reasoning_extraction_mistral(
|
||||
reasoning_parser: ReasoningParser,
|
||||
model_output: list[int],
|
||||
request: ChatCompletionRequest | None = None,
|
||||
streaming: bool = False,
|
||||
) -> tuple[str | None, str | None]:
|
||||
assert isinstance(reasoning_parser.model_tokenizer, MistralTokenizer), type(
|
||||
reasoning_parser.model_tokenizer
|
||||
)
|
||||
if streaming:
|
||||
reconstructor = run_reasoning_extraction_streaming_mistral(
|
||||
reasoning_parser,
|
||||
model_output,
|
||||
request,
|
||||
)
|
||||
return (
|
||||
reconstructor.reasoning,
|
||||
reconstructor.other_content or None,
|
||||
)
|
||||
else:
|
||||
str_output = reasoning_parser.model_tokenizer.convert_ids_to_tokens(
|
||||
model_output
|
||||
)
|
||||
reasoning, content = run_reasoning_extraction_nonstreaming(
|
||||
reasoning_parser, str_output, request
|
||||
)
|
||||
return reasoning, content
|
||||
|
||||
|
||||
def run_reasoning_extraction_nonstreaming(
|
||||
reasoning_parser: ReasoningParser,
|
||||
model_output: list[str],
|
||||
request: ChatCompletionRequest | None = None,
|
||||
) -> tuple[str | None, str | None]:
|
||||
request = request or ChatCompletionRequest(messages=[], model="test-model")
|
||||
return reasoning_parser.extract_reasoning(
|
||||
model_output="".join(model_output), request=request
|
||||
)
|
||||
|
||||
|
||||
def run_reasoning_extraction_streaming(
|
||||
reasoning_parser: ReasoningParser,
|
||||
model_deltas: list[str],
|
||||
request: ChatCompletionRequest | None = None,
|
||||
) -> StreamingReasoningReconstructor:
|
||||
request = request or ChatCompletionRequest(messages=[], model="test-model")
|
||||
reconstructor = StreamingReasoningReconstructor()
|
||||
previous_text = ""
|
||||
previous_tokens: list[int] = []
|
||||
for delta in model_deltas:
|
||||
token_delta = [
|
||||
reasoning_parser.vocab.get(token)
|
||||
for token in reasoning_parser.model_tokenizer.tokenize(delta)
|
||||
if token in reasoning_parser.vocab
|
||||
]
|
||||
current_text = previous_text + delta
|
||||
current_tokens = previous_tokens + token_delta
|
||||
delta_message = reasoning_parser.extract_reasoning_streaming(
|
||||
previous_text,
|
||||
current_text,
|
||||
delta,
|
||||
previous_tokens,
|
||||
current_tokens,
|
||||
token_delta,
|
||||
)
|
||||
if delta_message is not None:
|
||||
reconstructor.append_delta(delta_message)
|
||||
previous_text = current_text
|
||||
previous_tokens = current_tokens
|
||||
return reconstructor
|
||||
|
||||
|
||||
def run_reasoning_extraction_streaming_mistral(
|
||||
reasoning_parser: ReasoningParser,
|
||||
model_deltas: list[int],
|
||||
request: ChatCompletionRequest | None = None,
|
||||
) -> StreamingReasoningReconstructor:
|
||||
assert isinstance(reasoning_parser.model_tokenizer, MistralTokenizer), type(
|
||||
reasoning_parser.model_tokenizer
|
||||
)
|
||||
request = request or ChatCompletionRequest(messages=[], model="test-model")
|
||||
reconstructor = StreamingReasoningReconstructor()
|
||||
previous_text = ""
|
||||
previous_tokens: list[int] = []
|
||||
for model_delta in model_deltas:
|
||||
token_delta = [model_delta]
|
||||
delta = reasoning_parser.model_tokenizer.convert_ids_to_tokens([model_delta])[0]
|
||||
current_text = previous_text + delta
|
||||
current_tokens = previous_tokens + token_delta
|
||||
delta_message = reasoning_parser.extract_reasoning_streaming(
|
||||
previous_text,
|
||||
current_text,
|
||||
delta,
|
||||
previous_tokens,
|
||||
current_tokens,
|
||||
token_delta,
|
||||
)
|
||||
if delta_message is not None:
|
||||
reconstructor.append_delta(delta_message)
|
||||
previous_text = current_text
|
||||
previous_tokens = current_tokens
|
||||
return reconstructor
|
||||
Reference in New Issue
Block a user