Sync from v0.13

2026-01-19 10:38:50 +08:00
parent b2ef04d792
commit 5aef6c175a
3714 changed files with 854317 additions and 89342 deletions
--- a/tests/reasoning/init.py
+++ b/tests/reasoning/init.py
--- a/tests/reasoning/test_base_thinking_reasoning_parser.py
+++ b/tests/reasoning/test_base_thinking_reasoning_parser.py
@@ -0,0 +1,421 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.entrypoints.openai.protocol import ChatCompletionRequest
+from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser
+
+
+# Create a concrete test implementation of BaseThinkingReasoningParser
+class TestThinkingReasoningParser(BaseThinkingReasoningParser):
+    """Test implementation of BaseThinkingReasoningParser."""
+
+    @property
+    def start_token(self) -> str:
+        return "<test:think>"
+
+    @property
+    def end_token(self) -> str:
+        return "</test:think>"
+
+
+class TestThinkingReasoningParserAlt(BaseThinkingReasoningParser):
+    """Alternative test implementation with different tokens."""
+
+    @property
+    def start_token(self) -> str:
+        return "<alt:start>"
+
+    @property
+    def end_token(self) -> str:
+        return "<alt:end>"
+
+
+# Use a test model
+REASONING_MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+
+
+@pytest.fixture(scope="module")
+def test_tokenizer():
+    tokenizer = AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
+    # Add custom test tokens
+    test_tokens = ["<test:think>", "</test:think>", "<alt:start>", "<alt:end>"]
+    existing_tokens = set(tokenizer.get_vocab().keys())
+    new_tokens = [token for token in test_tokens if token not in existing_tokens]
+    if new_tokens:
+        tokenizer.add_tokens(new_tokens)
+    return tokenizer
+
+
+class TestBaseThinkingReasoningParserInit:
+    """
+    Test initialization and basic properties of
+    BaseThinkingReasoningParser.
+    """
+
+    def test_successful_initialization(self, test_tokenizer):
+        """Test successful initialization with valid tokens."""
+        parser = TestThinkingReasoningParser(test_tokenizer)
+        assert parser.start_token == "<test:think>"
+        assert parser.end_token == "</test:think>"
+        assert parser.start_token_id is not None
+        assert parser.end_token_id is not None
+
+    def test_initialization_with_missing_tokenizer(self):
+        """Test that initialization fails without tokenizer."""
+        with pytest.raises(ValueError, match="model tokenizer must be passed"):
+            TestThinkingReasoningParser(None)
+
+    def test_initialization_with_missing_tokens(self, test_tokenizer):
+        """Test that initialization fails when tokens are not in vocabulary."""
+
+        # Create a parser with tokens not in vocabulary
+        class MissingTokenParser(BaseThinkingReasoningParser):
+            @property
+            def start_token(self) -> str:
+                return "<missing:start>"
+
+            @property
+            def end_token(self) -> str:
+                return "<missing:end>"
+
+        with pytest.raises(
+            RuntimeError, match="could not locate think start/end tokens"
+        ):
+            MissingTokenParser(test_tokenizer)
+
+    def test_initialization_with_empty_tokens(self, test_tokenizer):
+        """Test that initialization fails with empty token strings."""
+
+        class EmptyTokenParser(BaseThinkingReasoningParser):
+            @property
+            def start_token(self) -> str:
+                return ""
+
+            @property
+            def end_token(self) -> str:
+                return ""
+
+        with pytest.raises(
+            ValueError, match="start_token and end_token must be defined"
+        ):
+            EmptyTokenParser(test_tokenizer)
+
+
+class TestBaseThinkingReasoningParserMethods:
+    """Test the methods of BaseThinkingReasoningParser."""
+
+    def test_is_reasoning_end(self, test_tokenizer):
+        """Test the is_reasoning_end method."""
+        parser = TestThinkingReasoningParser(test_tokenizer)
+        end_token_id = parser.end_token_id
+        start_token_id = parser.start_token_id
+        # Test with end token present
+        assert parser.is_reasoning_end([1, 2, end_token_id, 4]) is True
+
+        # Test without end token
+        assert parser.is_reasoning_end([1, 2, 3, 4]) is False
+
+        # Test with empty list
+        assert parser.is_reasoning_end([]) is False
+
+        # Test with interleaved thinking
+        assert parser.is_reasoning_end([1, start_token_id, 2, end_token_id]) is True
+        assert parser.is_reasoning_end([1, start_token_id, 2, 3]) is False
+        assert (
+            parser.is_reasoning_end(
+                [1, start_token_id, 2, end_token_id, 2, 2, start_token_id]
+            )
+            is False
+        )
+
+    def test_is_reasoning_end_streaming(self, test_tokenizer):
+        """Test the is_reasoning_end_streaming method."""
+        parser = TestThinkingReasoningParser(test_tokenizer)
+        end_token_id = parser.end_token_id
+        start_token_id = parser.start_token_id
+
+        assert (
+            parser.is_reasoning_end_streaming([1, 2, end_token_id], [end_token_id])
+            is True
+        )
+        assert parser.is_reasoning_end_streaming([1, 2, 3, 4], [4]) is False
+        assert parser.is_reasoning_end_streaming([], []) is False
+        assert (
+            parser.is_reasoning_end_streaming(
+                [1, start_token_id, 2, end_token_id], [end_token_id]
+            )
+            is True
+        )
+        assert (
+            parser.is_reasoning_end_streaming([1, start_token_id, 2, 3], [3]) is False
+        )
+        assert (
+            parser.is_reasoning_end_streaming(
+                [1, start_token_id, 2, end_token_id, 2, start_token_id, 2],
+                [2],
+            )
+            is False
+        )
+        assert (
+            parser.is_reasoning_end_streaming(
+                [1, start_token_id, 2, end_token_id, 2, 2], [2]
+            )
+            is False
+        )
+
+    def test_extract_content_ids(self, test_tokenizer):
+        """Test the extract_content_ids method."""
+        parser = TestThinkingReasoningParser(test_tokenizer)
+        end_token_id = parser.end_token_id
+
+        # Test with end token in the middle
+        input_ids = [1, 2, end_token_id, 4, 5]
+        content_ids = parser.extract_content_ids(input_ids)
+        assert content_ids == [4, 5]
+
+        # Test with end token at the end
+        input_ids = [1, 2, 3, end_token_id]
+        content_ids = parser.extract_content_ids(input_ids)
+        assert content_ids == []
+
+        # Test without end token
+        input_ids = [1, 2, 3, 4]
+        content_ids = parser.extract_content_ids(input_ids)
+        assert content_ids == []
+
+        # Test with end token as last element (should not extract)
+        input_ids = [1, 2, 3, end_token_id]
+        content_ids = parser.extract_content_ids(input_ids)
+        assert content_ids == []
+
+
+class TestBaseThinkingReasoningParserExtraction:
+    """Test reasoning content extraction methods."""
+
+    def test_extract_reasoning_with_both_tokens(self, test_tokenizer):
+        """Test extraction when both start and end tokens are present."""
+        parser = TestThinkingReasoningParser(test_tokenizer)
+        request = ChatCompletionRequest(messages=[], model="test-model")
+
+        model_output = "<test:think>This is reasoning</test:think>This is content"
+        reasoning, content = parser.extract_reasoning(model_output, request)
+
+        assert reasoning == "This is reasoning"
+        assert content == "This is content"
+
+    def test_extract_reasoning_only_end_token(self, test_tokenizer):
+        """Test extraction when only end token is present."""
+        parser = TestThinkingReasoningParser(test_tokenizer)
+        request = ChatCompletionRequest(messages=[], model="test-model")
+
+        model_output = "This is reasoning</test:think>This is content"
+        reasoning, content = parser.extract_reasoning(model_output, request)
+
+        assert reasoning == "This is reasoning"
+        assert content == "This is content"
+
+    def test_extract_reasoning_no_end_token(self, test_tokenizer):
+        """Test extraction when no end token is present."""
+        parser = TestThinkingReasoningParser(test_tokenizer)
+        request = ChatCompletionRequest(messages=[], model="test-model")
+
+        model_output = "This is just content"
+        reasoning, content = parser.extract_reasoning(model_output, request)
+
+        assert reasoning == "This is just content"
+        assert content is None
+
+    def test_extract_reasoning_empty_output(self, test_tokenizer):
+        """Test extraction with empty output."""
+        parser = TestThinkingReasoningParser(test_tokenizer)
+        request = ChatCompletionRequest(messages=[], model="test-model")
+
+        model_output = ""
+        reasoning, content = parser.extract_reasoning(model_output, request)
+
+        assert reasoning == ""
+        assert content is None
+
+    def test_extract_reasoning_only_tokens(self, test_tokenizer):
+        """Test extraction with only tokens and no content."""
+        parser = TestThinkingReasoningParser(test_tokenizer)
+        request = ChatCompletionRequest(messages=[], model="test-model")
+
+        model_output = "<test:think></test:think>"
+        reasoning, content = parser.extract_reasoning(model_output, request)
+
+        assert reasoning == ""
+        assert content is None
+
+
+class TestBaseThinkingReasoningParserStreaming:
+    """Test streaming functionality of BaseThinkingReasoningParser."""
+
+    @pytest.mark.parametrize("streaming", [True, False])
+    def test_simple_reasoning_extraction(self, test_tokenizer, streaming):
+        """
+        Test basic reasoning extraction in both
+        streaming and non-streaming modes.
+        """
+        parser = TestThinkingReasoningParser(test_tokenizer)
+
+        model_output = [
+            "<test:think>",
+            "Some ",
+            "reasoning ",
+            "content",
+            "</test:think>",
+            "Final ",
+            "answer",
+        ]
+
+        reasoning, content = run_reasoning_extraction(
+            parser, model_output, streaming=streaming
+        )
+
+        assert reasoning == "Some reasoning content"
+        assert content == "Final answer"
+
+    def test_streaming_with_incremental_deltas(self, test_tokenizer):
+        """Test streaming processing with small incremental deltas."""
+        parser = TestThinkingReasoningParser(test_tokenizer)
+
+        deltas = [
+            "<test:think>",
+            "Some ",
+            "reasoning ",
+            "content",
+            "</test:think>",
+            "Final ",
+            "answer",
+        ]
+
+        reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)
+
+        assert reasoning == "Some reasoning content"
+        assert content == "Final answer"
+
+    def test_streaming_with_start_token(self, test_tokenizer):
+        """Test streaming with start token included."""
+        parser = TestThinkingReasoningParser(test_tokenizer)
+
+        deltas = [
+            "<test:think>",
+            "Some ",
+            "reasoning",
+            "</test:think>",
+            "Answer",
+        ]
+
+        reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)
+
+        assert reasoning == "Some reasoning"
+        assert content == "Answer"
+
+    def test_streaming_no_end_token(self, test_tokenizer):
+        """Test streaming when no end token is encountered."""
+        parser = TestThinkingReasoningParser(test_tokenizer)
+
+        deltas = [
+            "<test:think>",
+            "Some ",
+            "reasoning ",
+            "without ",
+            "end",
+        ]
+
+        reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)
+
+        assert reasoning == "Some reasoning without end"
+        assert content is None
+
+    def test_streaming_only_end_token(self, test_tokenizer):
+        """Test streaming when only end token appears."""
+        parser = TestThinkingReasoningParser(test_tokenizer)
+
+        deltas = [
+            "<test:think>",
+            "Reasoning ",
+            "content",
+            "</test:think>",
+            "Final",
+        ]
+
+        reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)
+
+        assert reasoning == "Reasoning content"
+        assert content == "Final"
+
+
+class TestBaseThinkingReasoningParserMultipleImplementations:
+    """
+    Test that multiple implementations of
+    BaseThinkingReasoningParser work correctly.
+    """
+
+    def test_different_token_implementations(self, test_tokenizer):
+        """
+        Test that different implementations
+        with different tokens work independently.
+        """
+        parser1 = TestThinkingReasoningParser(test_tokenizer)
+        parser2 = TestThinkingReasoningParserAlt(test_tokenizer)
+
+        # Test parser1
+        model_output1 = "Reasoning1</test:think>Content1"
+        reasoning1, content1 = run_reasoning_extraction(parser1, [model_output1])
+        assert reasoning1 == "Reasoning1"
+        assert content1 == "Content1"
+
+        # Test parser2
+        model_output2 = "Reasoning2<alt:end>Content2"
+        reasoning2, content2 = run_reasoning_extraction(parser2, [model_output2])
+        assert reasoning2 == "Reasoning2"
+        assert content2 == "Content2"
+
+        # Verify tokens are different
+        assert parser1.start_token != parser2.start_token
+        assert parser1.end_token != parser2.end_token
+        assert parser1.start_token_id != parser2.start_token_id
+        assert parser1.end_token_id != parser2.end_token_id
+
+
+class TestBaseThinkingReasoningParserEdgeCases:
+    """Test edge cases and error conditions."""
+
+    def test_multiple_end_tokens(self, test_tokenizer):
+        """Test behavior with multiple end tokens."""
+        parser = TestThinkingReasoningParser(test_tokenizer)
+
+        model_output = "First</test:think>Middle</test:think>Last"
+        reasoning, content = run_reasoning_extraction(parser, [model_output])
+
+        # Should stop at first end token
+        assert reasoning == "First"
+        assert content == "Middle</test:think>Last"
+
+    def test_nested_tokens(self, test_tokenizer):
+        """Test behavior with nested-like token patterns."""
+        parser = TestThinkingReasoningParser(test_tokenizer)
+
+        model_output = "<test:think>Outer<test:think>Inner</test:think>Content"
+        reasoning, content = run_reasoning_extraction(parser, [model_output])
+
+        # Should process normally, start from first start token
+        assert reasoning == "Outer<test:think>Inner"
+        assert content == "Content"
+
+    def test_malformed_tokens(self, test_tokenizer):
+        """Test behavior with malformed token-like strings."""
+        parser = TestThinkingReasoningParser(test_tokenizer)
+
+        model_output = "<test:thinking>Not a real token</test:thinking>Content"
+        reasoning, content = run_reasoning_extraction(parser, [model_output])
+
+        # Should treat as regular content since tokens don't match exactly
+        assert reasoning == ("<test:thinking>Not a real token</test:thinking>Content")
+        assert content is None
--- a/tests/reasoning/test_deepseekr1_reasoning_parser.py
+++ b/tests/reasoning/test_deepseekr1_reasoning_parser.py
@@ -0,0 +1,288 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+
+parser_name = "deepseek_r1"
+start_token = "<think>"
+end_token = "</think>"
+
+REASONING_MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+
+
+@pytest.fixture(scope="module")
+def deepseek_r1_qwen_tokenizer():
+    return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
+
+
+SIMPLE_REASONING = {
+    "output": "This is a reasoning section</think>This is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+COMPLETE_REASONING = {
+    "output": "This is a reasoning section</think>",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": True,
+}
+NO_CONTENT = {
+    "output": "This is content",
+    "reasoning": "This is content",
+    "content": None,
+    "is_reasoning_end": False,
+}
+NO_REASONING_STREAMING = {
+    "output": "This is a reasoning section",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": False,
+}
+MULTIPLE_LINES = {
+    "output": "This\nThat</think>This is the rest\nThat",
+    "reasoning": "This\nThat",
+    "content": "This is the rest\nThat",
+    "is_reasoning_end": True,
+}
+SHORTEST_REASONING_NO_STREAMING = {
+    "output": "</think>This is the rest",
+    "reasoning": "",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+SHORTEST_REASONING = {
+    "output": "</think>This is the rest",
+    "reasoning": None,
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+REASONING_WITH_THINK = {
+    "output": "<think>This is a reasoning section</think>This is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+COMPLETE_REASONING_WITH_THINK = {
+    "output": "<think>This is a reasoning section</think>",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": True,
+}
+MULTIPLE_LINES_WITH_THINK = {
+    "output": "<think>This\nThat</think>This is the rest\nThat",
+    "reasoning": "This\nThat",
+    "content": "This is the rest\nThat",
+    "is_reasoning_end": True,
+}
+SHORTEST_REASONING_NO_STREAMING_WITH_THINK = {
+    "output": "</think>This is the rest",
+    "reasoning": "",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+SHORTEST_REASONING_WITH_THINK = {
+    "output": "</think>This is the rest",
+    "reasoning": None,
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+THINK_NO_END = {
+    "output": "<think>This is a reasoning section",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": False,
+}
+EMPTY = {
+    "output": "",
+    "reasoning": "",
+    "content": None,
+    "is_reasoning_end": False,
+}
+EMPTY_STREAMING = {
+    "output": "",
+    "reasoning": None,
+    "content": None,
+    "is_reasoning_end": False,
+}
+NEW_LINE = {
+    "output": "\n<think>This is a reasoning section</think>\nThis is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "\nThis is the rest",
+    "is_reasoning_end": True,
+}
+# Streaming cannot handle new lines at the beginning of the output
+# because we need to support <think>...</think> and </think>...
+# We cannot know if the text before <think> is reasoning content
+# or not.
+NEW_LINE_STREAMING = {
+    "output": "\n<think>This is a reasoning section</think>\nThis is the rest",
+    "reasoning": "\nThis is a reasoning section",
+    "content": "\nThis is the rest",
+    "is_reasoning_end": True,
+}
+
+TEST_CASES = [
+    pytest.param(
+        False,
+        SIMPLE_REASONING,
+        id="simple_reasoning",
+    ),
+    pytest.param(
+        True,
+        SIMPLE_REASONING,
+        id="simple_reasoning_streaming",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING,
+        id="complete_reasoning",
+    ),
+    pytest.param(
+        True,
+        COMPLETE_REASONING,
+        id="complete_reasoning_streaming",
+    ),
+    pytest.param(
+        False,
+        NO_CONTENT,
+        id="no_content_token",
+    ),
+    pytest.param(
+        True,
+        NO_REASONING_STREAMING,
+        id="no_reasoning_token_streaming",
+    ),
+    pytest.param(
+        False,
+        MULTIPLE_LINES,
+        id="multiple_lines",
+    ),
+    pytest.param(
+        True,
+        MULTIPLE_LINES,
+        id="multiple_lines_streaming",
+    ),
+    pytest.param(
+        True,
+        SHORTEST_REASONING,
+        id="shortest",
+    ),
+    pytest.param(
+        False,
+        SHORTEST_REASONING_NO_STREAMING,
+        id="shortest_streaming",
+    ),
+    pytest.param(
+        False,
+        REASONING_WITH_THINK,
+        id="reasoning_with_think",
+    ),
+    pytest.param(
+        True,
+        REASONING_WITH_THINK,
+        id="reasoning_with_think_streaming",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING_WITH_THINK,
+        id="complete_reasoning_with_think",
+    ),
+    pytest.param(
+        True,
+        COMPLETE_REASONING_WITH_THINK,
+        id="complete_reasoning_with_think_streaming",
+    ),
+    pytest.param(
+        False,
+        MULTIPLE_LINES_WITH_THINK,
+        id="multiple_lines_with_think",
+    ),
+    pytest.param(
+        True,
+        MULTIPLE_LINES_WITH_THINK,
+        id="multiple_lines_with_think_streaming",
+    ),
+    pytest.param(
+        False,
+        SHORTEST_REASONING_NO_STREAMING_WITH_THINK,
+        id="shortest_with_think",
+    ),
+    pytest.param(
+        True,
+        SHORTEST_REASONING_WITH_THINK,
+        id="shortest_with_think_streaming",
+    ),
+    pytest.param(
+        False,
+        THINK_NO_END,
+        id="think_no_end",
+    ),
+    pytest.param(
+        True,
+        THINK_NO_END,
+        id="think_no_end_streaming",
+    ),
+    pytest.param(
+        False,
+        EMPTY,
+        id="empty",
+    ),
+    pytest.param(
+        True,
+        EMPTY_STREAMING,
+        id="empty_streaming",
+    ),
+    pytest.param(
+        False,
+        NEW_LINE,
+        id="new_line",
+    ),
+    pytest.param(
+        True,
+        NEW_LINE_STREAMING,
+        id="new_line_streaming",
+    ),
+]
+
+
+@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
+def test_reasoning(
+    streaming: bool,
+    param_dict: dict,
+    deepseek_r1_qwen_tokenizer,
+):
+    output = deepseek_r1_qwen_tokenizer.tokenize(param_dict["output"])
+    # decode everything to tokens
+    output_tokens: list[str] = [
+        deepseek_r1_qwen_tokenizer.convert_tokens_to_string([token]) for token in output
+    ]
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        deepseek_r1_qwen_tokenizer
+    )
+
+    reasoning, content = run_reasoning_extraction(
+        parser, output_tokens, streaming=streaming
+    )
+
+    assert reasoning == param_dict["reasoning"]
+    assert content == param_dict["content"]
+
+    # Test is_reasoning_end
+    output_ids = deepseek_r1_qwen_tokenizer.convert_tokens_to_ids(output)
+    is_reasoning_end = parser.is_reasoning_end(output_ids)
+    assert is_reasoning_end == param_dict["is_reasoning_end"]
+
+    # Test extract_content
+    if param_dict["content"] is not None:
+        content = parser.extract_content_ids(output_ids)
+        assert content == deepseek_r1_qwen_tokenizer.convert_tokens_to_ids(
+            deepseek_r1_qwen_tokenizer.tokenize(param_dict["content"])
+        )
+    else:
+        content = parser.extract_content_ids(output)
+        assert content == []
--- a/tests/reasoning/test_deepseekv3_reasoning_parser.py
+++ b/tests/reasoning/test_deepseekv3_reasoning_parser.py
@@ -0,0 +1,75 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from transformers import AutoTokenizer
+
+from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
+from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
+from vllm.reasoning.deepseek_v3_reasoning_parser import DeepSeekV3ReasoningParser
+from vllm.reasoning.identity_reasoning_parser import IdentityReasoningParser
+
+REASONING_MODEL_NAME = "deepseek-ai/DeepSeek-V3.1"
+
+
+@pytest.fixture(scope="module")
+def tokenizer():
+    return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
+
+
+@pytest.mark.parametrize(
+    "thinking,expected_parser_type",
+    [
+        (True, DeepSeekR1ReasoningParser),
+        (False, IdentityReasoningParser),
+    ],
+)
+def test_parser_selection(tokenizer, thinking, expected_parser_type):
+    parser = DeepSeekV3ReasoningParser(
+        tokenizer, chat_template_kwargs={"thinking": thinking}
+    )
+
+    assert isinstance(parser._parser, expected_parser_type)
+
+
+def test_identity_reasoning_parser_basic(tokenizer):
+    parser = IdentityReasoningParser(tokenizer)
+
+    # Test is_reasoning_end always returns True
+    input_text = "This is some output"
+    input_tokens = tokenizer.tokenize(input_text)
+    input_ids = tokenizer.convert_tokens_to_ids(input_tokens)
+    assert parser.is_reasoning_end(input_ids) is True
+    assert parser.is_reasoning_end_streaming(input_ids, input_ids) is True
+
+    # Test extract_content_ids returns all input_ids
+    assert parser.extract_content_ids(input_ids) == input_ids
+
+    # Test extract_reasoning returns (None, model_output)
+    request = ChatCompletionRequest(model="test-model", messages=[], temperature=1.0)
+    reasoning, content = parser.extract_reasoning(input_text, request)
+    assert reasoning is None
+    assert content == input_text
+
+    # Test extract_reasoning_streaming returns DeltaMessage or None
+    result = parser.extract_reasoning_streaming(
+        previous_text="",
+        current_text="Hello world",
+        delta_text="Hello world",
+        previous_token_ids=[],
+        current_token_ids=input_ids,
+        delta_token_ids=input_ids,
+    )
+    assert isinstance(result, DeltaMessage)
+    assert result.content == "Hello world"
+
+    # If delta_text is empty, should return None
+    result_none = parser.extract_reasoning_streaming(
+        previous_text="Hello world",
+        current_text="Hello world",
+        delta_text="",
+        previous_token_ids=input_ids,
+        current_token_ids=input_ids,
+        delta_token_ids=[],
+    )
+    assert result_none is None
--- a/tests/reasoning/test_ernie45_reasoning_parser.py
+++ b/tests/reasoning/test_ernie45_reasoning_parser.py
@@ -0,0 +1,124 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+
+parser_name = "ernie45"
+
+REASONING_MODEL_NAME = "baidu/ERNIE-4.5-21B-A3B-Thinking"
+
+
+@pytest.fixture(scope="module")
+def ernie45_tokenizer():
+    return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
+
+
+# 带 </think>，非stream
+WITH_THINK = {
+    "output": "abc</think>def",
+    "reasoning": "abc",
+    "content": "def",
+}
+# 带 </think>，stream
+WITH_THINK_STREAM = {
+    "output": "abc</think>def",
+    "reasoning": "abc",
+    "content": "def",
+}
+# without </think>, all is reasoning
+WITHOUT_THINK = {
+    "output": "abc",
+    "reasoning": "abc",
+    "content": None,
+}
+# without </think>, all is reasoning
+WITHOUT_THINK_STREAM = {
+    "output": "abc",
+    "reasoning": "abc",
+    "content": None,
+}
+
+COMPLETE_REASONING = {
+    "output": "abc</think>",
+    "reasoning": "abc",
+    "content": None,
+}
+MULTILINE_REASONING = {
+    "output": "abc\nABC</think>def\nDEF",
+    "reasoning": "abc\nABC",
+    "content": "def\nDEF",
+}
+
+TEST_CASES = [
+    pytest.param(
+        False,
+        WITH_THINK,
+        id="with_think",
+    ),
+    pytest.param(
+        True,
+        WITH_THINK_STREAM,
+        id="with_think_stream",
+    ),
+    pytest.param(
+        False,
+        WITHOUT_THINK,
+        id="without_think",
+    ),
+    pytest.param(
+        True,
+        WITHOUT_THINK_STREAM,
+        id="without_think_stream",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING,
+        id="complete_reasoning",
+    ),
+    pytest.param(
+        True,
+        COMPLETE_REASONING,
+        id="complete_reasoning_stream",
+    ),
+    pytest.param(
+        False,
+        MULTILINE_REASONING,
+        id="multiline_reasoning",
+    ),
+    pytest.param(
+        True,
+        MULTILINE_REASONING,
+        id="multiline_reasoning_stream",
+    ),
+]
+
+
+@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
+def test_reasoning(
+    streaming: bool,
+    param_dict: dict,
+    ernie45_tokenizer,
+):
+    output = ernie45_tokenizer.tokenize(param_dict["output"])
+    output_tokens: list[str] = []
+    for token in output:
+        one_token = ernie45_tokenizer.convert_tokens_to_string([token])
+        if one_token:
+            output_tokens.append(one_token)
+
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        ernie45_tokenizer
+    )
+
+    reasoning, content = run_reasoning_extraction(
+        parser, output_tokens, streaming=streaming
+    )
+
+    print()
+
+    assert reasoning == param_dict["reasoning"]
+    assert content == param_dict["content"]
--- a/tests/reasoning/test_glm4_moe_reasoning_parser.py
+++ b/tests/reasoning/test_glm4_moe_reasoning_parser.py
@@ -0,0 +1,205 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+
+parser_name = "glm45"
+start_token = "<think>"
+end_token = "</think>"
+
+REASONING_MODEL_NAME = "zai-org/GLM-4.5"
+
+
+@pytest.fixture(scope="module")
+def glm45_tokenizer():
+    return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
+
+
+WITH_THINK = {
+    "output": "<think>This is a reasoning section</think>This is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+
+WITH_THINK_STREAM = {
+    "output": "<think>This is a reasoning section</think>This is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+
+WITHOUT_THINK = {
+    "output": "This is the rest",
+    "reasoning": None,
+    "content": "This is the rest",
+    "is_reasoning_end": False,
+}
+
+WITHOUT_THINK_STREAM = {
+    "output": "This is the rest",
+    "reasoning": None,
+    "content": "This is the rest",
+    "is_reasoning_end": False,
+}
+
+COMPLETE_REASONING = {
+    "output": "<think>This is a reasoning section</think>",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": True,
+}
+MULTILINE_REASONING = {
+    "output": "<think>This is a reasoning\nsection</think>This is the rest\nThat",
+    "reasoning": "This is a reasoning\nsection",
+    "content": "This is the rest\nThat",
+    "is_reasoning_end": True,
+}
+ONLY_OPEN_TAG = {
+    "output": "<think>This is a reasoning section",
+    "reasoning": None,
+    "content": "<think>This is a reasoning section",
+    "is_reasoning_end": False,
+}
+
+ONLY_OPEN_TAG_STREAM = {
+    "output": "<think>This is a reasoning section",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": False,
+}
+
+TEST_CASES = [
+    pytest.param(
+        False,
+        WITH_THINK,
+        id="with_think",
+    ),
+    pytest.param(
+        True,
+        WITH_THINK_STREAM,
+        id="with_think_stream",
+    ),
+    pytest.param(
+        False,
+        WITHOUT_THINK,
+        id="without_think",
+    ),
+    pytest.param(
+        True,
+        WITHOUT_THINK_STREAM,
+        id="without_think_stream",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING,
+        id="complete_reasoning",
+    ),
+    pytest.param(
+        True,
+        COMPLETE_REASONING,
+        id="complete_reasoning_stream",
+    ),
+    pytest.param(
+        False,
+        MULTILINE_REASONING,
+        id="multiline_reasoning",
+    ),
+    pytest.param(
+        True,
+        MULTILINE_REASONING,
+        id="multiline_reasoning_stream",
+    ),
+    pytest.param(
+        False,
+        ONLY_OPEN_TAG,
+        id="only_open_tag",
+    ),
+    pytest.param(
+        True,
+        ONLY_OPEN_TAG_STREAM,
+        id="only_open_tag_stream",
+    ),
+]
+
+STILL_REASONING_PROMPT = """[gMASK]<sop><|system|>
+You are a helpful assistant.<|user|>
+What is the capital of France?<|assistant|>
+<think>The user is asking for the capital of"""
+
+DONE_REASONING_PROMPT = """[gMASK]<sop><|system|>
+You are a helpful assistant.<|user|>
+What is the capital of France?<|assistant|>
+<think>The user is asking for the capital of France.</think>
+The capital of France is Paris."""
+
+MULTI_TURN_STILL_REASONING_PROMPT = """[gMASK]<sop><|system|>
+You are a helpful assistant.<|user|>
+What is the capital of France?<|assistant|>
+<think></think>
+The capital of France is Paris.<|user|>
+What about Chile?<|assistant|>
+<think>The user is asking for the capital of"""
+
+MULTI_TURN_DONE_REASONING_PROMPT = """[gMASK]<sop><|system|>
+You are a helpful assistant.<|user|>
+What is the capital of France?<|assistant|>
+<think></think>
+The capital of France is Paris.<|user|>
+What about Chile?<|assistant|>
+<think>The user is asking for the capital of Chile.</think>
+The capital of Chile is Santiago."""
+
+REASONING_END_TEST_CASES = [
+    pytest.param(STILL_REASONING_PROMPT, False, id="still_reasoning"),
+    pytest.param(DONE_REASONING_PROMPT, True, id="done_reasoning"),
+    pytest.param(
+        MULTI_TURN_STILL_REASONING_PROMPT, False, id="multi_turn_still_reasoning"
+    ),
+    pytest.param(
+        MULTI_TURN_DONE_REASONING_PROMPT, True, id="multi_turn_done_reasoning"
+    ),
+]
+
+
+@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
+def test_reasoning(
+    streaming: bool,
+    param_dict: dict,
+    glm45_tokenizer,
+):
+    output = glm45_tokenizer.tokenize(param_dict["output"])
+    output_tokens: list[str] = [
+        glm45_tokenizer.convert_tokens_to_string([token]) for token in output
+    ]
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        glm45_tokenizer
+    )
+
+    reasoning, content = run_reasoning_extraction(
+        parser, output_tokens, streaming=streaming
+    )
+
+    assert reasoning == param_dict["reasoning"]
+    assert content == param_dict["content"]
+
+    output_ids = glm45_tokenizer.convert_tokens_to_ids(output)
+    is_reasoning_end = parser.is_reasoning_end(output_ids)
+    assert is_reasoning_end == param_dict["is_reasoning_end"]
+
+
+@pytest.mark.parametrize("prompt, is_reasoning_end", REASONING_END_TEST_CASES)
+def test_is_reasoning_end_full_prompt(
+    prompt: str, is_reasoning_end: bool, glm45_tokenizer
+):
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        glm45_tokenizer
+    )
+    tokens = glm45_tokenizer.tokenize(prompt)
+    token_ids = glm45_tokenizer.convert_tokens_to_ids(tokens)
+    check_is_reasoning_end = parser.is_reasoning_end(token_ids)
+    assert check_is_reasoning_end == is_reasoning_end
--- a/tests/reasoning/test_gptoss_reasoning_parser.py
+++ b/tests/reasoning/test_gptoss_reasoning_parser.py
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from transformers import AutoTokenizer
+
+from vllm.reasoning import ReasoningParser
+from vllm.reasoning.gptoss_reasoning_parser import GptOssReasoningParser
+
+REASONING_MODEL_NAME = "openai/gpt-oss-120b"
+
+
+@pytest.fixture(scope="module")
+def gpt_oss_tokenizer():
+    return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
+
+
+USER_MESSAGE_START = "<|start|>user<|message|>"
+REASONING_SECTION_START = "<|end|><|start|>assistant<|channel|>analysis<|message|>"
+ASSISTANT_CONTENT_START_PREFIX = "<|end|><|start|>assistant<|channel|>final"
+ASSISTANT_CONTENT_START_SUFFIX = "<|message|>"
+ASSISTANT_CONTENT_START = (
+    ASSISTANT_CONTENT_START_PREFIX + ASSISTANT_CONTENT_START_SUFFIX
+)
+
+BASIC_CONTENT = {
+    "output": REASONING_SECTION_START
+    + "This is reasoning"
+    + ASSISTANT_CONTENT_START
+    + "This is the rest",
+    "is_reasoning_end": True,
+}
+
+BASIC_REASONING_ONLY = {
+    "output": REASONING_SECTION_START + "This is reasoning" + "<|end|>",
+    "is_reasoning_end": False,
+}
+BASIC_NO_REASONING_NO_ASSISTANT = {
+    "output": USER_MESSAGE_START + "This is a user message",
+    "is_reasoning_end": False,
+}
+
+# Edge-case where the model omits the assistant tag entirely.
+BASIC_NO_REASONING_ASSISTANT = {
+    "output": USER_MESSAGE_START + "This is a user message<|end|><|channel|>final",
+    "is_reasoning_end": True,
+}
+
+COMPLEX_CONTENT_INCOMPLETE_PREFIX_ONLY = {
+    "output": REASONING_SECTION_START
+    + "This is reasoning"
+    + ASSISTANT_CONTENT_START_PREFIX,
+    "is_reasoning_end": False,
+}
+
+COMPLEX_CONTENT_SUFFIX_ONLY = {
+    "output": REASONING_SECTION_START
+    + "This is reasoning"
+    + ASSISTANT_CONTENT_START_SUFFIX,
+    "is_reasoning_end": False,
+}
+
+COMPLEX_CONTENT_1_NO_SUFFIX = {
+    "output": REASONING_SECTION_START
+    + "This is reasoning"
+    + ASSISTANT_CONTENT_START_PREFIX
+    + "<|constrain|> JSON ",
+    "is_reasoning_end": False,
+}
+
+COMPLEX_CONTENT_1 = {
+    "output": REASONING_SECTION_START
+    + "This is reasoning"
+    + ASSISTANT_CONTENT_START_PREFIX
+    + "<|constrain|> JSON "
+    + ASSISTANT_CONTENT_START_SUFFIX,
+    "is_reasoning_end": True,
+}
+
+COMPLEX_CONTENT_1_WITH_CONTENT = {
+    "output": REASONING_SECTION_START
+    + "This is reasoning"
+    + ASSISTANT_CONTENT_START_PREFIX
+    + "<|constrain|> JSON "
+    + ASSISTANT_CONTENT_START_SUFFIX
+    + "This is the rest",
+    "is_reasoning_end": True,
+}
+
+COMPLEX_CONTENT_2 = {
+    "output": REASONING_SECTION_START
+    + "This is reasoning"
+    + ASSISTANT_CONTENT_START_PREFIX
+    + "<|constrain|>ReplyAction "
+    + ASSISTANT_CONTENT_START_SUFFIX
+    + "This is the rest",
+    "is_reasoning_end": True,
+}
+
+TEST_CASES = [
+    BASIC_CONTENT,
+    BASIC_REASONING_ONLY,
+    COMPLEX_CONTENT_INCOMPLETE_PREFIX_ONLY,
+    COMPLEX_CONTENT_SUFFIX_ONLY,
+    COMPLEX_CONTENT_1_NO_SUFFIX,
+    COMPLEX_CONTENT_1,
+    COMPLEX_CONTENT_1_WITH_CONTENT,
+    COMPLEX_CONTENT_2,
+]
+
+
+@pytest.mark.parametrize(
+    "output, is_reasoning_end",
+    [(t["output"], t["is_reasoning_end"]) for t in TEST_CASES],
+)
+def test_gptoss_is_reasoning_end(
+    output,
+    is_reasoning_end,
+    gpt_oss_tokenizer,
+):
+    output = gpt_oss_tokenizer.tokenize(output)
+    parser: ReasoningParser = GptOssReasoningParser(gpt_oss_tokenizer)
+
+    # Test is_reasoning_end
+    output_ids = gpt_oss_tokenizer.convert_tokens_to_ids(output)
+    actual_is_reasoning_end = parser.is_reasoning_end(output_ids)
+    assert is_reasoning_end == actual_is_reasoning_end
--- a/tests/reasoning/test_granite_reasoning_parser.py
+++ b/tests/reasoning/test_granite_reasoning_parser.py
@@ -0,0 +1,344 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import DeltaMessage, run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+
+parser_name = "granite"
+START_REASONING = "Here is my thought process:"
+START_RESPONSE = "Here is my response:"
+
+SIMPLE_REASONING = {
+    "output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest",  # noqa: E501
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+}
+COMPLETE_REASONING = {
+    "output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+}
+NO_REASONING = {
+    "output": "This is content",
+    "reasoning": None,
+    "content": "This is content",
+}
+MULTIPLE_LINES = {
+    "output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
+    "reasoning": "This\nThat",
+    "content": "This is the rest\nThat",
+}
+REASONING_WITH_THINK = {
+    "output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest",  # noqa: E501
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+}
+COMPLETE_REASONING_WITH_THINK = {
+    "output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+}
+MULTIPLE_LINES_WITH_THINK = {
+    "output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
+    "reasoning": "This\nThat",
+    "content": "This is the rest\nThat",
+}
+
+TEST_CASES = [
+    pytest.param(
+        False,
+        SIMPLE_REASONING,
+        id="simple_reasoning",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING,
+        id="complete_reasoning",
+    ),
+    pytest.param(
+        False,
+        NO_REASONING,
+        id="no_reasoning",
+    ),
+    pytest.param(
+        False,
+        MULTIPLE_LINES,
+        id="multiple_lines",
+    ),
+    pytest.param(
+        False,
+        REASONING_WITH_THINK,
+        id="reasoning_with_think",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING_WITH_THINK,
+        id="complete_reasoning_with_think",
+    ),
+    pytest.param(
+        False,
+        MULTIPLE_LINES_WITH_THINK,
+        id="multiple_lines_with_think",
+    ),
+    pytest.param(
+        True,
+        SIMPLE_REASONING,
+        id="simple_reasoning_streaming",
+    ),
+    pytest.param(
+        True,
+        COMPLETE_REASONING,
+        id="complete_reasoning_streaming",
+    ),
+    pytest.param(
+        True,
+        NO_REASONING,
+        id="no_reasoning_streaming",
+    ),
+    pytest.param(
+        True,
+        MULTIPLE_LINES,
+        id="multiple_lines_streaming",
+    ),
+    pytest.param(
+        True,
+        REASONING_WITH_THINK,
+        id="reasoning_with_think_streaming",
+    ),
+    pytest.param(
+        True,
+        COMPLETE_REASONING_WITH_THINK,
+        id="complete_reasoning_with_think_streaming",
+    ),
+    pytest.param(
+        True,
+        MULTIPLE_LINES_WITH_THINK,
+        id="multiple_lines_with_think_streaming",
+    ),
+]
+
+# Global tokenizer initialization to avoid repeated loading
+tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")
+
+
+@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
+def test_reasoning(
+    streaming: bool,
+    param_dict: dict,
+):
+    output = tokenizer.tokenize(param_dict["output"])
+    # decode everything to tokens
+    output_tokens: list[str] = [
+        tokenizer.convert_tokens_to_string([token]) for token in output
+    ]
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        tokenizer
+    )
+
+    reasoning, content = run_reasoning_extraction(
+        parser, output_tokens, streaming=streaming
+    )
+
+    assert reasoning == param_dict["reasoning"]
+    assert content == param_dict["content"]
+
+
+# Additional tests for verifying the correctness of granite streaming; this
+# is complicated because granite uses multiple tokens to indicate when thinking
+# is starting / when it's starting its response, so skipping special tokens
+# is awkward.
+
+### Handling the start of reasoning
+STREAMING_1 = {
+    "previous_text": None,
+    "current_text": "Here",
+    "delta_text": "Here",
+    "reasoning": None,
+    "content": None,
+}
+# When we fail, we should give what was previously being silenced first
+STREAMING_2 = {
+    "previous_text": "Here is my thought",
+    "current_text": "Here is my thought failure",
+    "delta_text": " failure",
+    "reasoning": None,
+    "content": "Here is my thought failure",
+}
+# But then after the first one, we should only add the delta text to content
+STREAMING_3 = {
+    "previous_text": "Here wrong",
+    "current_text": " words",
+    "delta_text": " Here wrong words",
+    "reasoning": None,
+    "content": " words",
+}
+# But then after the first one, we should only add the delta text to content
+STREAMING_4 = {
+    "previous_text": "Here is my thought",
+    "current_text": "Here is my thought process:",
+    "delta_text": " process:",
+    "reasoning": None,
+    "content": None,
+}
+# Reasoning started successfully; parse reasoning content
+STREAMING_5 = {
+    "previous_text": "Here is my thought process:",
+    "current_text": "Here is my thought process: foo",
+    "delta_text": " foo",
+    "reasoning": " foo",
+    "content": None,
+}
+# Response special sequence has started, but not finished.
+STREAMING_6 = {
+    "previous_text": "Here is my thought process: foo",
+    "current_text": "Here is my thought process: foo Here is",
+    "delta_text": " Here is",
+    "reasoning": " ",
+    "content": None,
+}
+# Response special sequence started, but was broken; the reasoning
+# content should be the content that was previously unused.
+STREAMING_7 = {
+    "previous_text": "Here is my thought process: foo Here is",
+    "current_text": "Here is my thought process: foo Here is Here",
+    "delta_text": " Here",
+    "reasoning": "Here is ",
+    "content": None,
+}
+# Response special sequence is ongoing
+STREAMING_8 = {
+    "previous_text": "Here is my thought process: foo Here is my response:",
+    "current_text": "Here is my thought process: foo Here is my response: bar",
+    "delta_text": " bar",
+    "reasoning": None,
+    "content": " bar",
+}
+# The delta text has everything; we should be able to correctly parse both
+STREAMING_9 = {
+    "previous_text": None,
+    "current_text": "Here is my thought process: foo Here is my response: bar",
+    "delta_text": "Here is my thought process: foo Here is my response: bar",
+    "reasoning": " foo ",
+    "content": " bar",
+}
+## The Response is ongoing, and the delta mixes reasoning content / content
+STREAMING_10 = {
+    "previous_text": "Here is my thought process: foo",
+    "current_text": "Here is my thought process: foo bar Here is my response: baz",
+    "delta_text": " bar Here is my response: baz",
+    "reasoning": " bar ",
+    "content": " baz",
+}
+# The delta text starts a new substring that might be a response special seq
+STREAMING_11 = {
+    "previous_text": "Here is my thought process: This is a reasoning section ",
+    "current_text": "Here is my thought process: This is a reasoning section Here",
+    "delta_text": "Here",
+    "reasoning": None,
+    "content": None,
+}
+# The delta text is finishing the response special seq
+STREAMING_12 = {
+    "previous_text": "Here is my thought process: foo Here is my response",
+    "current_text": "Here is my thought process: foo Here is my response:",
+    "delta_text": ":",
+    "reasoning": None,
+    "content": None,
+}
+STREAMING_13 = {
+    "previous_text": "Here is my thought process: foo Here",
+    "current_text": "Here is my thought process: foo Here was",
+    "delta_text": " was",
+    "reasoning": "Here was",
+    "content": None,
+}
+
+STREAMING_SUBCASES = [
+    pytest.param(
+        STREAMING_1,
+        id="Starting reasoning special sequence",
+    ),
+    pytest.param(
+        STREAMING_2,
+        id="Unexpected start reasoning sequence",
+    ),
+    pytest.param(
+        STREAMING_3,
+        id="Continuing unexpected start reasoning sequence",
+    ),
+    pytest.param(
+        STREAMING_4,
+        id="Only start reasoning sequence and nothing else",
+    ),
+    pytest.param(
+        STREAMING_5,
+        id="Reasoning content has started",
+    ),
+    pytest.param(
+        STREAMING_6,
+        id="Response special sequence has started",
+    ),
+    pytest.param(
+        STREAMING_7,
+        id="Response special sequence reset",
+    ),
+    pytest.param(
+        STREAMING_8,
+        id="Response text has started",
+    ),
+    pytest.param(
+        STREAMING_9,
+        id="Delta contains everything",
+    ),
+    pytest.param(
+        STREAMING_10,
+        id="Delta contains some reasoning and response",
+    ),
+    pytest.param(
+        STREAMING_11,
+        id="Delta starts response sequence",
+    ),
+    pytest.param(
+        STREAMING_12,
+        id="Delta finishes response sequence",
+    ),
+    pytest.param(
+        STREAMING_13,
+        id="Delta breaks potential responise sequence",
+    ),
+]
+
+
+@pytest.mark.parametrize("param_dict", STREAMING_SUBCASES)
+def test_streaming_subcases(param_dict):
+    # Get all of the token IDs
+    previous_token_ids = (
+        tokenizer.encode(param_dict["previous_text"])
+        if param_dict["previous_text"] is not None
+        else []
+    )
+    current_token_ids = tokenizer.encode(param_dict["current_text"])
+    delta_token_ids = tokenizer.encode(param_dict["delta_text"])
+
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        tokenizer
+    )
+
+    response = parser.extract_reasoning_streaming(
+        previous_text=param_dict["previous_text"],
+        current_text=param_dict["current_text"],
+        delta_text=param_dict["delta_text"],
+        previous_token_ids=previous_token_ids,
+        current_token_ids=current_token_ids,
+        delta_token_ids=delta_token_ids,
+    )
+    # Streaming currently expects at least one of reasoning content / content,
+    # so the response should return None in that case.
+    if param_dict["reasoning"] is None and param_dict["content"] is None:
+        assert response is None
+    else:
+        assert isinstance(response, DeltaMessage)
+        assert param_dict["reasoning"] == response.reasoning
+        assert param_dict["content"] == response.content
--- a/tests/reasoning/test_holo2_reasoning_parser.py
+++ b/tests/reasoning/test_holo2_reasoning_parser.py
@@ -0,0 +1,188 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+from vllm.reasoning.deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser
+from vllm.reasoning.holo2_reasoning_parser import Holo2ReasoningParser
+from vllm.reasoning.identity_reasoning_parser import IdentityReasoningParser
+
+REASONING_MODEL_NAME = "HCompany/Holo2-4B"
+
+
+@pytest.fixture(scope="module")
+def tokenizer():
+    return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
+
+
+@pytest.mark.parametrize(
+    "thinking,expected_parser_type",
+    [
+        (True, DeepSeekR1ReasoningParser),
+        (False, IdentityReasoningParser),
+    ],
+)
+def test_parser_selection(tokenizer, thinking, expected_parser_type):
+    parser = Holo2ReasoningParser(
+        tokenizer,
+        chat_template_kwargs={
+            "thinking": thinking,
+        },
+    )
+
+    assert isinstance(parser._parser, expected_parser_type)
+
+
+def test_holo2_default_parser_is_deepseekr1(tokenizer):
+    parser = Holo2ReasoningParser(tokenizer)
+
+    assert isinstance(parser._parser, DeepSeekR1ReasoningParser)
+
+
+def test_holo2_supports_structured_output(tokenizer):
+    # Structured output manager uses the reasoning parser to check if the
+    # reasoning content is ended before applying the grammar. The main function
+    # used is is_reasoning_end. This test checks if the parser is able to
+    # correctly identify the end of the reasoning content.
+
+    # important to not pass chat_template_kwargs here as it is done in the
+    # StructuredOutputManager
+    parser = Holo2ReasoningParser(tokenizer)
+
+    end_token_id = tokenizer.encode("</think>", add_special_tokens=False)[0]
+
+    assert parser.is_reasoning_end([1, 2, 4, end_token_id])
+    assert not parser.is_reasoning_end([1, 2, 4])
+    assert parser.is_reasoning_end([1, 2, 4, end_token_id, 5])
+
+
+# thinking is True, non-streaming
+WITH_THINK = {
+    "output": "This is a reasoning section</think>This is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+}
+# thinking is True, streaming
+WITH_THINK_STREAM = {
+    "output": "This is a reasoning section</think>This is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+}
+# thinking is False, non-streaming
+THINKING_DISABLED = {
+    "output": "This is the rest",
+    "reasoning": None,
+    "content": "This is the rest",
+}
+# thinking is False, streaming
+THINKING_DISABLED_STREAM = {
+    "output": "This is the rest",
+    "reasoning": None,
+    "content": "This is the rest",
+}
+# thinking is False but the model output </think>, non-streaming
+THINKING_DISABLED_WITH_CLOSE_TAG = {
+    "output": "</think>This is the rest",
+    "reasoning": None,
+    "content": "</think>This is the rest",
+}
+# thinking is False but the model output </think>, streaming
+THINKING_DISABLED_WITH_CLOSE_TAG_STREAM = {
+    "output": "some text</think>This is the rest",
+    "reasoning": None,
+    "content": "some text</think>This is the rest",
+}
+COMPLETE_REASONING = {
+    "output": "This is a reasoning section</think>",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+}
+
+TEST_CASES = [
+    pytest.param(
+        False,
+        WITH_THINK,
+        None,
+        id="with_think",
+    ),
+    pytest.param(
+        True,
+        WITH_THINK_STREAM,
+        None,
+        id="with_think_stream",
+    ),
+    pytest.param(
+        False,
+        WITH_THINK,
+        {"thinking": True},
+        id="with_think_enabled",
+    ),
+    pytest.param(
+        True,
+        WITH_THINK_STREAM,
+        {"thinking": True},
+        id="with_think_stream_enabled",
+    ),
+    pytest.param(
+        False,
+        THINKING_DISABLED,
+        {"thinking": False},
+        id="thinking_disabled",
+    ),
+    pytest.param(
+        True,
+        THINKING_DISABLED_STREAM,
+        {"thinking": False},
+        id="thinking_disabled_stream",
+    ),
+    pytest.param(
+        False,
+        THINKING_DISABLED_WITH_CLOSE_TAG,
+        {"thinking": False},
+        id="thinking_disabled_with_close_tag",
+    ),
+    pytest.param(
+        True,
+        THINKING_DISABLED_WITH_CLOSE_TAG_STREAM,
+        {"thinking": False},
+        id="thinking_disabled_with_close_tag_stream",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING,
+        None,
+        id="complete_reasoning",
+    ),
+    pytest.param(
+        True,
+        COMPLETE_REASONING,
+        None,
+        id="complete_reasoning_stream",
+    ),
+]
+
+
+@pytest.mark.parametrize("streaming, param_dict, chat_template_kwargs", TEST_CASES)
+def test_reasoning(
+    streaming: bool,
+    param_dict: dict,
+    chat_template_kwargs: dict | None,
+    tokenizer,
+):
+    output = tokenizer.tokenize(param_dict["output"])
+    output_tokens: list[str] = [
+        tokenizer.convert_tokens_to_string([token]) for token in output
+    ]
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser("holo2")(
+        tokenizer,
+        chat_template_kwargs=chat_template_kwargs,
+    )
+
+    reasoning, content = run_reasoning_extraction(
+        parser, output_tokens, streaming=streaming
+    )
+
+    assert reasoning == param_dict["reasoning"]
+    assert content == param_dict["content"]
--- a/tests/reasoning/test_hunyuan_reasoning_parser.py
+++ b/tests/reasoning/test_hunyuan_reasoning_parser.py
@@ -0,0 +1,168 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+
+parser_name = "hunyuan_a13b"
+START_REASONING = "<think>\n"
+START_RESPONSE = "\n</think>\n<answer>\n"
+END_RESPONSE = "\n</answer>"
+
+NO_REASONING_QUICK_THROUGHT = {
+    "output": f"{START_REASONING}{START_RESPONSE}This is the rest{END_RESPONSE}",  # noqa: E501
+    "reasoning": None,
+    "content": "This is the rest",
+}
+
+SIMPLE_REASONING = {
+    "output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest{END_RESPONSE}",  # noqa: E501
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+}
+COMPLETE_REASONING = {
+    "output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+}
+
+COMPLETE_REASONING_WITH_SYMBOL = {
+    "output": f"{START_REASONING}This is a reasoning section!{START_RESPONSE}",
+    "reasoning": "This is a reasoning section!",
+    "content": None,
+}
+NO_REASONING = {
+    "output": "This is content",
+    "reasoning": None,
+    "content": "This is content",
+}
+MULTIPLE_LINES = {
+    "output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
+    "reasoning": "This\nThat",
+    "content": "This is the rest\nThat",
+}
+REASONING_WITH_THINK = {
+    "output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}This is the rest",  # noqa: E501
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+}
+COMPLETE_REASONING_WITH_THINK = {
+    "output": f"{START_REASONING}This is a reasoning section{START_RESPONSE}",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+}
+MULTIPLE_LINES_WITH_THINK = {
+    "output": f"{START_REASONING}This\nThat{START_RESPONSE}This is the rest\nThat",
+    "reasoning": "This\nThat",
+    "content": "This is the rest\nThat",
+}
+
+TEST_CASES = [
+    pytest.param(
+        False,
+        SIMPLE_REASONING,
+        id="simple_reasoning",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING,
+        id="complete_reasoning",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING_WITH_SYMBOL,
+        id="complete_reasoning_with_symbol",
+    ),
+    pytest.param(
+        False,
+        NO_REASONING,
+        id="no_reasoning",
+    ),
+    pytest.param(False, NO_REASONING_QUICK_THROUGHT, id="no_reasoning_quick"),
+    pytest.param(
+        False,
+        MULTIPLE_LINES,
+        id="multiple_lines",
+    ),
+    pytest.param(
+        False,
+        REASONING_WITH_THINK,
+        id="reasoning_with_think",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING_WITH_THINK,
+        id="complete_reasoning_with_think",
+    ),
+    pytest.param(
+        False,
+        MULTIPLE_LINES_WITH_THINK,
+        id="multiple_lines_with_think",
+    ),
+    pytest.param(
+        True,
+        SIMPLE_REASONING,
+        id="simple_reasoning_streaming",
+    ),
+    pytest.param(
+        True,
+        COMPLETE_REASONING,
+        id="complete_reasoning_streaming",
+    ),
+    pytest.param(
+        True,
+        NO_REASONING,
+        id="no_reasoning_streaming",
+    ),
+    pytest.param(True, NO_REASONING_QUICK_THROUGHT, id="no_reasoning_quick_stream"),
+    pytest.param(
+        True,
+        MULTIPLE_LINES,
+        id="multiple_lines_streaming",
+    ),
+    pytest.param(
+        True,
+        REASONING_WITH_THINK,
+        id="reasoning_with_think_streaming",
+    ),
+    pytest.param(
+        True,
+        COMPLETE_REASONING_WITH_THINK,
+        id="complete_reasoning_with_think_streaming",
+    ),
+    pytest.param(
+        True,
+        MULTIPLE_LINES_WITH_THINK,
+        id="multiple_lines_with_think_streaming",
+    ),
+]
+
+# Global tokenizer initialization to avoid repeated loading
+tokenizer = AutoTokenizer.from_pretrained(
+    "tencent/Hunyuan-A13B-Instruct", trust_remote_code=True
+)
+
+
+@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
+def test_reasoning(
+    streaming: bool,
+    param_dict: dict,
+):
+    output = tokenizer.tokenize(param_dict["output"])
+    # decode everything to tokens
+    output_tokens: list[str] = [
+        tokenizer.convert_tokens_to_string([token]) for token in output
+    ]
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        tokenizer
+    )
+
+    reasoning, content = run_reasoning_extraction(
+        parser, output_tokens, streaming=streaming
+    )
+
+    assert reasoning == param_dict["reasoning"]
+    assert content == param_dict["content"]
--- a/tests/reasoning/test_minimax_m2_append_reasoning_parser.py
+++ b/tests/reasoning/test_minimax_m2_append_reasoning_parser.py
@@ -0,0 +1,195 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+
+parser_name = "minimax_m2_append_think"
+end_token = "</think>"
+
+# MiniMax M2 model path
+REASONING_MODEL_NAME = "MiniMaxAI/MiniMax-M2"
+
+
+@pytest.fixture(scope="module")
+def minimax_m2_tokenizer():
+    return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
+
+
+# =============================================================================
+# MiniMaxM2AppendThinkReasoningParser behavior:
+# - Prepends <think> to the beginning of the output
+# - Does NOT separate reasoning and content
+# - Returns everything as content (with <think> prepended)
+# - reasoning is always None
+#
+# This parser is used when you want to keep the raw output with <think> added
+# =============================================================================
+
+# Case: simple output with end token
+SIMPLE_OUTPUT = {
+    "output": "This is reasoning</think>This is response",
+    "reasoning": None,
+    "content": "<think>This is reasoning</think>This is response",
+    "is_reasoning_end": True,
+}
+
+# Case: output without end token (reasoning in progress)
+NO_END_TOKEN = {
+    "output": "This is reasoning in progress",
+    "reasoning": None,
+    "content": "<think>This is reasoning in progress",
+    "is_reasoning_end": False,
+}
+
+# Case: only end token
+ONLY_END_TOKEN = {
+    "output": "</think>This is response",
+    "reasoning": None,
+    "content": "<think></think>This is response",
+    "is_reasoning_end": True,
+}
+
+# Case: multiple lines
+MULTIPLE_LINES = {
+    "output": "Line 1\nLine 2</think>Response 1\nResponse 2",
+    "reasoning": None,
+    "content": "<think>Line 1\nLine 2</think>Response 1\nResponse 2",
+    "is_reasoning_end": True,
+}
+
+# Case: empty output (non-streaming prepends <think>)
+EMPTY = {
+    "output": "",
+    "reasoning": None,
+    "content": "<think>",
+    "is_reasoning_end": False,
+}
+
+# Case: empty output streaming (no tokens = no output)
+EMPTY_STREAMING = {
+    "output": "",
+    "reasoning": None,
+    "content": None,
+    "is_reasoning_end": False,
+}
+
+# Case: special characters
+SPECIAL_CHARS = {
+    "output": "Let me think... 1+1=2</think>Yes!",
+    "reasoning": None,
+    "content": "<think>Let me think... 1+1=2</think>Yes!",
+    "is_reasoning_end": True,
+}
+
+# Case: code in output
+CODE_OUTPUT = {
+    "output": "```python\nprint('hi')\n```</think>Here's the code.",
+    "reasoning": None,
+    "content": "<think>```python\nprint('hi')\n```</think>Here's the code.",
+    "is_reasoning_end": True,
+}
+
+TEST_CASES = [
+    pytest.param(
+        False,
+        SIMPLE_OUTPUT,
+        id="simple_output",
+    ),
+    pytest.param(
+        True,
+        SIMPLE_OUTPUT,
+        id="simple_output_streaming",
+    ),
+    pytest.param(
+        False,
+        NO_END_TOKEN,
+        id="no_end_token",
+    ),
+    pytest.param(
+        True,
+        NO_END_TOKEN,
+        id="no_end_token_streaming",
+    ),
+    pytest.param(
+        False,
+        ONLY_END_TOKEN,
+        id="only_end_token",
+    ),
+    pytest.param(
+        True,
+        ONLY_END_TOKEN,
+        id="only_end_token_streaming",
+    ),
+    pytest.param(
+        False,
+        MULTIPLE_LINES,
+        id="multiple_lines",
+    ),
+    pytest.param(
+        True,
+        MULTIPLE_LINES,
+        id="multiple_lines_streaming",
+    ),
+    pytest.param(
+        False,
+        EMPTY,
+        id="empty",
+    ),
+    pytest.param(
+        True,
+        EMPTY_STREAMING,
+        id="empty_streaming",
+    ),
+    pytest.param(
+        False,
+        SPECIAL_CHARS,
+        id="special_chars",
+    ),
+    pytest.param(
+        True,
+        SPECIAL_CHARS,
+        id="special_chars_streaming",
+    ),
+    pytest.param(
+        False,
+        CODE_OUTPUT,
+        id="code_output",
+    ),
+    pytest.param(
+        True,
+        CODE_OUTPUT,
+        id="code_output_streaming",
+    ),
+]
+
+
+@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
+def test_reasoning(
+    streaming: bool,
+    param_dict: dict,
+    minimax_m2_tokenizer,
+):
+    output = minimax_m2_tokenizer.tokenize(param_dict["output"])
+    # decode everything to tokens
+    output_tokens: list[str] = [
+        minimax_m2_tokenizer.convert_tokens_to_string([token]) for token in output
+    ]
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        minimax_m2_tokenizer
+    )
+
+    reasoning, content = run_reasoning_extraction(
+        parser, output_tokens, streaming=streaming
+    )
+
+    assert reasoning == param_dict["reasoning"]
+    assert content == param_dict["content"]
+
+    # Test is_reasoning_end
+    output_ids = minimax_m2_tokenizer.convert_tokens_to_ids(output)
+    is_reasoning_end = parser.is_reasoning_end(output_ids)
+    assert is_reasoning_end == param_dict["is_reasoning_end"]
--- a/tests/reasoning/test_minimax_m2_reasoning_parser.py
+++ b/tests/reasoning/test_minimax_m2_reasoning_parser.py
@@ -0,0 +1,230 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+
+parser_name = "minimax_m2"
+end_token = "</think>"
+
+# MiniMax M2 model path
+REASONING_MODEL_NAME = "MiniMaxAI/MiniMax-M2"
+
+
+@pytest.fixture(scope="module")
+def minimax_m2_tokenizer():
+    return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
+
+
+# =============================================================================
+# MiniMax M2 specific behavior:
+# - Model does NOT generate <think> start token
+# - Model only generates </think> end token
+# - All content before </think> is reasoning
+# - All content after </think> is the actual response (content)
+# =============================================================================
+
+# Case: reasoning + end token + content (typical case)
+SIMPLE_REASONING = {
+    "output": "This is a reasoning section</think>This is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+
+# Case: reasoning + end token only (no content after)
+COMPLETE_REASONING = {
+    "output": "This is a reasoning section</think>",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": True,
+}
+
+# Case: no end token yet (streaming in progress, all is reasoning)
+NO_END_TOKEN = {
+    "output": "This is reasoning in progress",
+    "reasoning": "This is reasoning in progress",
+    "content": None,
+    "is_reasoning_end": False,
+}
+
+# Case: multiple lines of reasoning
+MULTIPLE_LINES = {
+    "output": "First line\nSecond line</think>Response first line\nResponse second",
+    "reasoning": "First line\nSecond line",
+    "content": "Response first line\nResponse second",
+    "is_reasoning_end": True,
+}
+
+# Case: only end token (empty reasoning, immediate response)
+SHORTEST_REASONING_NO_STREAMING = {
+    "output": "</think>This is the response",
+    "reasoning": "",
+    "content": "This is the response",
+    "is_reasoning_end": True,
+}
+
+# Case: only end token streaming (reasoning is None because it's just the token)
+SHORTEST_REASONING_STREAMING = {
+    "output": "</think>This is the response",
+    "reasoning": None,
+    "content": "This is the response",
+    "is_reasoning_end": True,
+}
+
+# Case: empty output
+EMPTY = {
+    "output": "",
+    "reasoning": "",
+    "content": None,
+    "is_reasoning_end": False,
+}
+
+# Case: empty streaming
+EMPTY_STREAMING = {
+    "output": "",
+    "reasoning": None,
+    "content": None,
+    "is_reasoning_end": False,
+}
+
+# Case: long reasoning with special characters
+SPECIAL_CHARS = {
+    "output": "Let me think... 1+1=2, right?</think>Yes, 1+1=2.",
+    "reasoning": "Let me think... 1+1=2, right?",
+    "content": "Yes, 1+1=2.",
+    "is_reasoning_end": True,
+}
+
+# Case: reasoning with code blocks
+CODE_IN_REASONING = {
+    "output": "```python\nprint('hello')\n```</think>Here is the code.",
+    "reasoning": "```python\nprint('hello')\n```",
+    "content": "Here is the code.",
+    "is_reasoning_end": True,
+}
+
+TEST_CASES = [
+    # Core cases: no start token (MiniMax M2 actual behavior)
+    pytest.param(
+        False,
+        SIMPLE_REASONING,
+        id="simple_reasoning",
+    ),
+    pytest.param(
+        True,
+        SIMPLE_REASONING,
+        id="simple_reasoning_streaming",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING,
+        id="complete_reasoning",
+    ),
+    pytest.param(
+        True,
+        COMPLETE_REASONING,
+        id="complete_reasoning_streaming",
+    ),
+    pytest.param(
+        False,
+        NO_END_TOKEN,
+        id="no_end_token",
+    ),
+    pytest.param(
+        True,
+        NO_END_TOKEN,
+        id="no_end_token_streaming",
+    ),
+    pytest.param(
+        False,
+        MULTIPLE_LINES,
+        id="multiple_lines",
+    ),
+    pytest.param(
+        True,
+        MULTIPLE_LINES,
+        id="multiple_lines_streaming",
+    ),
+    pytest.param(
+        False,
+        SHORTEST_REASONING_NO_STREAMING,
+        id="shortest_reasoning",
+    ),
+    pytest.param(
+        True,
+        SHORTEST_REASONING_STREAMING,
+        id="shortest_reasoning_streaming",
+    ),
+    pytest.param(
+        False,
+        EMPTY,
+        id="empty",
+    ),
+    pytest.param(
+        True,
+        EMPTY_STREAMING,
+        id="empty_streaming",
+    ),
+    pytest.param(
+        False,
+        SPECIAL_CHARS,
+        id="special_chars",
+    ),
+    pytest.param(
+        True,
+        SPECIAL_CHARS,
+        id="special_chars_streaming",
+    ),
+    pytest.param(
+        False,
+        CODE_IN_REASONING,
+        id="code_in_reasoning",
+    ),
+    pytest.param(
+        True,
+        CODE_IN_REASONING,
+        id="code_in_reasoning_streaming",
+    ),
+]
+
+
+@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
+def test_reasoning(
+    streaming: bool,
+    param_dict: dict,
+    minimax_m2_tokenizer,
+):
+    output = minimax_m2_tokenizer.tokenize(param_dict["output"])
+    # decode everything to tokens
+    output_tokens: list[str] = [
+        minimax_m2_tokenizer.convert_tokens_to_string([token]) for token in output
+    ]
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        minimax_m2_tokenizer
+    )
+
+    reasoning, content = run_reasoning_extraction(
+        parser, output_tokens, streaming=streaming
+    )
+
+    assert reasoning == param_dict["reasoning"]
+    assert content == param_dict["content"]
+
+    # Test is_reasoning_end
+    output_ids = minimax_m2_tokenizer.convert_tokens_to_ids(output)
+    is_reasoning_end = parser.is_reasoning_end(output_ids)
+    assert is_reasoning_end == param_dict["is_reasoning_end"]
+
+    # Test extract_content
+    if param_dict["content"] is not None:
+        content = parser.extract_content_ids(output_ids)
+        assert content == minimax_m2_tokenizer.convert_tokens_to_ids(
+            minimax_m2_tokenizer.tokenize(param_dict["content"])
+        )
+    else:
+        content = parser.extract_content_ids(output)
+        assert content == []
--- a/tests/reasoning/test_mistral_reasoning_parser.py
+++ b/tests/reasoning/test_mistral_reasoning_parser.py
@@ -0,0 +1,348 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+
+from tests.reasoning.utils import run_reasoning_extraction_mistral
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+from vllm.tokenizers.mistral import MistralTokenizer
+
+parser_name = "mistral"
+
+
+@pytest.fixture(scope="module")
+def mistral_tokenizer():
+    mistral_tokenizer = MistralTokenizer.from_pretrained(
+        "mistralai/Magistral-Small-2509"
+    )
+    return mistral_tokenizer
+
+
+INVALID_SIMPLE_REASONING = {
+    "output": "This is a reasoning section[/THINK]This is the rest",
+    "reasoning": None,
+    "content": "This is a reasoning sectionThis is the rest",
+    "is_reasoning_end": False,
+}
+INVALID_COMPLETE_REASONING = {
+    "output": "This is a reasoning section[/THINK]",
+    "reasoning": None,
+    "content": "This is a reasoning section",
+    "is_reasoning_end": False,
+}
+NO_CONTENT = {
+    "output": "[THINK]This is reasoning",
+    "reasoning": "This is reasoning",
+    "content": None,
+    "is_reasoning_end": False,
+}
+NO_REASONING = {
+    "output": "This is content",
+    "reasoning": None,
+    "content": "This is content",
+    "is_reasoning_end": False,
+}
+NO_REASONING_STREAMING = {
+    "output": "This is a reasoning section",
+    "reasoning": None,
+    "content": "This is a reasoning section",
+    "is_reasoning_end": False,
+}
+INVALID_MULTIPLE_LINES = {
+    "output": "This\nThat[/THINK]This is the rest\nThat",
+    "reasoning": None,
+    "content": "This\nThatThis is the rest\nThat",
+    "is_reasoning_end": False,
+}
+INVALID_SHORTEST_REASONING_NO_STREAMING = {
+    "output": "[/THINK]This is the rest",
+    "reasoning": None,
+    "content": "This is the rest",
+    "is_reasoning_end": False,
+}
+INVALID_SHORTEST_REASONING = {
+    "output": "[/THINK]This is the rest",
+    "reasoning": None,
+    "content": "This is the rest",
+    "is_reasoning_end": False,
+}
+REASONING_WITH_THINK = {
+    "output": "[THINK]This is a reasoning section[/THINK]This is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+COMPLETE_REASONING_WITH_THINK = {
+    "output": "[THINK]This is a reasoning section[/THINK]",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": True,
+}
+MULTIPLE_LINES_WITH_THINK = {
+    "output": "[THINK]This\nThat[/THINK]This is the rest\nThat",
+    "reasoning": "This\nThat",
+    "content": "This is the rest\nThat",
+    "is_reasoning_end": True,
+}
+INVALID_SHORTEST_REASONING_NO_STREAMING_WITH_THINK = {
+    "output": "[/THINK]This is the rest",
+    "reasoning": None,
+    "content": "This is the rest",
+    "is_reasoning_end": False,
+}
+INVALID_SHORTEST_REASONING_WITH_THINK = {
+    "output": "[/THINK]This is the rest",
+    "reasoning": None,
+    "content": "This is the rest",
+    "is_reasoning_end": False,
+}
+THINK_NO_END = {
+    "output": "[THINK]This is a reasoning section",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": False,
+}
+EMPTY = {
+    "output": "",
+    "reasoning": None,
+    "content": "",
+    "is_reasoning_end": False,
+}
+EMPTY_STREAMING = {
+    "output": "",
+    "reasoning": None,
+    "content": None,
+    "is_reasoning_end": False,
+}
+NEW_LINE = {
+    "output": "Before\n[THINK]This is a reasoning section[/THINK]\nThis is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "Before\n\nThis is the rest",
+    "is_reasoning_end": True,
+}
+NEW_LINE_STREAMING = {
+    "output": "Before\n[THINK]This is a reasoning section[/THINK]\nThis is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "Before\n\nThis is the rest",
+    "is_reasoning_end": True,
+}
+
+TEST_CASES = [
+    pytest.param(
+        False,
+        INVALID_SIMPLE_REASONING,
+        id="invalid_simple_reasoning",
+    ),
+    pytest.param(
+        True,
+        INVALID_SIMPLE_REASONING,
+        id="invalid_simple_reasoning_streaming",
+    ),
+    pytest.param(
+        False,
+        INVALID_COMPLETE_REASONING,
+        id="invalid_complete_reasoning",
+    ),
+    pytest.param(
+        True,
+        INVALID_COMPLETE_REASONING,
+        id="invalid_complete_reasoning_streaming",
+    ),
+    pytest.param(
+        False,
+        NO_CONTENT,
+        id="no_content",
+    ),
+    pytest.param(
+        False,
+        NO_REASONING,
+        id="no_reasoning",
+    ),
+    pytest.param(
+        True,
+        NO_REASONING_STREAMING,
+        id="no_reasoning_token_streaming",
+    ),
+    pytest.param(
+        False,
+        INVALID_MULTIPLE_LINES,
+        id="invalid_multiple_lines",
+    ),
+    pytest.param(
+        True,
+        INVALID_MULTIPLE_LINES,
+        id="invalid_multiple_lines_streaming",
+    ),
+    pytest.param(
+        True,
+        INVALID_SHORTEST_REASONING,
+        id="invalid_shortest",
+    ),
+    pytest.param(
+        False,
+        INVALID_SHORTEST_REASONING_NO_STREAMING,
+        id="invalid_shortest_streaming",
+    ),
+    pytest.param(
+        False,
+        REASONING_WITH_THINK,
+        id="reasoning_with_think",
+    ),
+    pytest.param(
+        True,
+        REASONING_WITH_THINK,
+        id="reasoning_with_think_streaming",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING_WITH_THINK,
+        id="complete_reasoning_with_think",
+    ),
+    pytest.param(
+        True,
+        COMPLETE_REASONING_WITH_THINK,
+        id="complete_reasoning_with_think_streaming",
+    ),
+    pytest.param(
+        False,
+        MULTIPLE_LINES_WITH_THINK,
+        id="multiple_lines_with_think",
+    ),
+    pytest.param(
+        True,
+        MULTIPLE_LINES_WITH_THINK,
+        id="multiple_lines_with_think_streaming",
+    ),
+    pytest.param(
+        False,
+        INVALID_SHORTEST_REASONING_NO_STREAMING_WITH_THINK,
+        id="invalid_shortest_with_think",
+    ),
+    pytest.param(
+        True,
+        INVALID_SHORTEST_REASONING_WITH_THINK,
+        id="invalid_shortest_with_think_streaming",
+    ),
+    pytest.param(
+        False,
+        THINK_NO_END,
+        id="think_no_end",
+    ),
+    pytest.param(
+        True,
+        THINK_NO_END,
+        id="think_no_end_streaming",
+    ),
+    pytest.param(
+        False,
+        EMPTY,
+        id="empty",
+    ),
+    pytest.param(
+        True,
+        EMPTY_STREAMING,
+        id="empty_streaming",
+    ),
+    pytest.param(
+        False,
+        NEW_LINE,
+        id="new_line",
+    ),
+    pytest.param(
+        True,
+        NEW_LINE_STREAMING,
+        id="new_line_streaming",
+    ),
+]
+
+
+@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
+def test_mistral_reasoning(
+    streaming: bool,
+    param_dict: dict,
+    mistral_tokenizer: MistralTokenizer,
+):
+    output = param_dict["output"]
+
+    index_think = output.find("[THINK]")
+    len_think = len("[THINK]")
+    index_end_think = output.find("[/THINK]")
+    len_end_think = len("[/THINK]")
+
+    # encode everything to tokens ids
+    output_tokens = []
+    if index_think != -1:
+        output_before_think = output[:index_think]
+        output_tokens += mistral_tokenizer.tokenizer.encode(
+            output_before_think, False, False
+        )
+        output_tokens += [mistral_tokenizer.instruct.BEGIN_THINK]
+
+        if index_end_think != -1:
+            output_middle = output[index_think + len_think : index_end_think]
+            output_after_think = output[index_end_think + len_end_think :]
+            output_tokens += mistral_tokenizer.tokenizer.encode(
+                output_middle, False, False
+            )
+            output_tokens += [mistral_tokenizer.instruct.END_THINK]
+            output_tokens += mistral_tokenizer.tokenizer.encode(
+                output_after_think, False, False
+            )
+        else:
+            output_middle = output[index_think + len_think :]
+            output_tokens += mistral_tokenizer.tokenizer.encode(
+                output_middle, False, False
+            )
+    elif index_end_think != -1:
+        output_before_think = output[:index_end_think]
+        output_after_think = output[index_end_think + len_end_think :]
+        output_tokens += mistral_tokenizer.tokenizer.encode(
+            output_before_think, False, False
+        )
+        output_tokens += [mistral_tokenizer.instruct.END_THINK]
+        output_tokens += mistral_tokenizer.tokenizer.encode(
+            output_after_think, False, False
+        )
+    else:
+        output_tokens += mistral_tokenizer.tokenizer.encode(output, False, False)
+
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        mistral_tokenizer
+    )
+
+    reasoning, content = run_reasoning_extraction_mistral(
+        parser, output_tokens, streaming=streaming
+    )
+
+    assert reasoning == param_dict["reasoning"]
+    assert content == param_dict["content"]
+
+    # Test is_reasoning_end
+    is_reasoning_end = parser.is_reasoning_end(output_tokens)
+    assert is_reasoning_end == param_dict["is_reasoning_end"]
+
+    # Test extract_content
+    if param_dict["content"] is not None:
+        # Handle the case where there are tokens outputted before Thinking.
+        # This should not occur if the model is well trained and prompted.
+        if "[THINK]" in param_dict["output"] and not param_dict["output"].startswith(
+            "[THINK]"
+        ):
+            before_content = param_dict["output"].split("[THINK]")[0]
+            before_token_ids = mistral_tokenizer.tokenizer.encode(
+                before_content, bos=False, eos=False
+            )
+            left_to_encode = param_dict["content"][len(before_content) :]
+        # Normal situation.
+        else:
+            before_token_ids = []
+            left_to_encode = param_dict["content"]
+
+        content_tokens = parser.extract_content_ids(output_tokens)
+        expected_token_ids = before_token_ids + mistral_tokenizer.tokenizer.encode(
+            left_to_encode, bos=False, eos=False
+        )
+        assert content_tokens == expected_token_ids
+    else:
+        content = parser.extract_content_ids(output_tokens)
+        assert content == []
--- a/tests/reasoning/test_olmo3_reasoning_parser.py
+++ b/tests/reasoning/test_olmo3_reasoning_parser.py
@@ -0,0 +1,152 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+
+parser_name = "olmo3"
+START_REASONING = "<think>"
+END_REASONING = "</think>"
+
+NO_REASONING = {
+    "output": f"{START_REASONING}{END_REASONING}No thoughts, head empty!",
+    "reasoning": None,
+    "content": "No thoughts, head empty!",
+}
+
+NO_REASONING_WITH_NEWLINE = {
+    "output": f"{START_REASONING}\n{END_REASONING}\n\nNo thoughts, head empty!",
+    "reasoning": "\n",
+    "content": "\n\nNo thoughts, head empty!",
+}
+
+SIMPLE_REASONING = {
+    "output": f"{START_REASONING}This is a reasoning section{END_REASONING}This is the rest",  # noqa: E501
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+}
+
+SIMPLE_REASONING_WITH_NEWLINE = {
+    "output": f"{START_REASONING} Look!\n\nI'm thinking...{END_REASONING}\nThis is the rest",  # noqa: E501
+    "reasoning": " Look!\n\nI'm thinking...",
+    "content": "\nThis is the rest",
+}
+
+SIMPLE_REASONING_WITH_MULTIPLE_NEWLINES = {
+    "output": f"{START_REASONING}\nLook!\nI'm thinking...\n\n{END_REASONING}\n\n\nThis is the rest",  # noqa: E501
+    "reasoning": "\nLook!\nI'm thinking...\n\n",
+    "content": "\n\n\nThis is the rest",
+}
+
+NO_REASONING_ONLY_END_THINK = {
+    "output": f"{END_REASONING}\n\nNo thoughts, head empty!",
+    "reasoning": None,
+    "content": "\n\nNo thoughts, head empty!",
+}
+
+REASONING_ONLY_END_THINK = {
+    "output": f"The user is asking me not to think.{END_REASONING}No thoughts!",
+    "reasoning": "The user is asking me not to think.",
+    "content": "No thoughts!",
+}
+
+TEST_CASES = [
+    pytest.param(
+        False,  # not streaming
+        NO_REASONING,
+        id="no_reasoning",
+    ),
+    pytest.param(
+        False,  # not streaming
+        NO_REASONING_WITH_NEWLINE,
+        id="no_reasoning_with_newline",
+    ),
+    pytest.param(
+        False,  # not streaming
+        SIMPLE_REASONING,
+        id="simple_reasoning",
+    ),
+    pytest.param(
+        False,  # not streaming
+        SIMPLE_REASONING_WITH_NEWLINE,
+        id="simple_reasoning_with_newline",
+    ),
+    pytest.param(
+        True,  # enable streaming
+        SIMPLE_REASONING_WITH_MULTIPLE_NEWLINES,
+        id="simple_reasoning_with_multiple_newlines",
+    ),
+    pytest.param(
+        False,  # not streaming
+        NO_REASONING_ONLY_END_THINK,
+        id="no_reasoning_only_end_think",
+    ),
+    pytest.param(
+        False,  # not streaming
+        REASONING_ONLY_END_THINK,
+        id="yes_reasoning_only_end_think",
+    ),
+    pytest.param(
+        True,  # enable streaming
+        NO_REASONING,
+        id="no_reasoning_streaming",
+    ),
+    pytest.param(
+        True,  # enable streaming
+        NO_REASONING_WITH_NEWLINE,
+        id="no_reasoning_with_newline_streaming",
+    ),
+    pytest.param(
+        True,  # enable streaming
+        SIMPLE_REASONING,
+        id="simple_reasoning_streaming",
+    ),
+    pytest.param(
+        True,  # enable streaming
+        SIMPLE_REASONING_WITH_NEWLINE,
+        id="simple_reasoning_with_newline_streaming",
+    ),
+    pytest.param(
+        True,  # enable streaming
+        SIMPLE_REASONING_WITH_MULTIPLE_NEWLINES,
+        id="simple_reasoning_with_multiple_newlines_streaming",
+    ),
+    pytest.param(
+        True,  # enable streaming
+        NO_REASONING_ONLY_END_THINK,
+        id="no_reasoning_only_end_think_streaming",
+    ),
+    pytest.param(
+        True,  # enable streaming
+        REASONING_ONLY_END_THINK,
+        id="yes_reasoning_only_end_think_streaming",
+    ),
+]
+
+# Global tokenizer initialization to avoid repeated loading
+tokenizer = AutoTokenizer.from_pretrained("allenai/dolma2-tokenizer")
+
+
+@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
+def test_reasoning(
+    streaming: bool,
+    param_dict: dict[str, str],
+):
+    output = tokenizer.tokenize(param_dict["output"])
+
+    # decode everything to tokens
+    model_output: list[str] = [
+        tokenizer.convert_tokens_to_string([token]) for token in output
+    ]
+    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
+    parser: ReasoningParser = parser_cls(tokenizer)
+
+    reasoning, content = run_reasoning_extraction(
+        reasoning_parser=parser, model_output=model_output, streaming=streaming
+    )
+
+    assert reasoning == param_dict["reasoning"]
+    assert content == param_dict["content"]
--- a/tests/reasoning/test_qwen3_reasoning_parser.py
+++ b/tests/reasoning/test_qwen3_reasoning_parser.py
@@ -0,0 +1,142 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+
+parser_name = "qwen3"
+start_token = "<think>"
+end_token = "</think>"
+
+REASONING_MODEL_NAME = "Qwen/Qwen3-0.6B"
+
+
+@pytest.fixture(scope="module")
+def qwen3_tokenizer():
+    return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
+
+
+# 带 <think></think>，非stream
+WITH_THINK = {
+    "output": "<think>This is a reasoning section</think>This is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+}
+# 带 <think></think>，stream
+WITH_THINK_STREAM = {
+    "output": "<think>This is a reasoning section</think>This is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+}
+# 不带 <think></think>，非stream
+WITHOUT_THINK = {
+    "output": "This is the rest",
+    "reasoning": None,
+    "content": "This is the rest",
+}
+# 不带 <think></think>，stream
+WITHOUT_THINK_STREAM = {
+    "output": "This is the rest",
+    "reasoning": None,
+    "content": "This is the rest",
+}
+
+COMPLETE_REASONING = {
+    "output": "<think>This is a reasoning section</think>",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+}
+MULTILINE_REASONING = {
+    "output": "<think>This is a reasoning\nsection</think>This is the rest\nThat",
+    "reasoning": "This is a reasoning\nsection",
+    "content": "This is the rest\nThat",
+}
+ONLY_OPEN_TAG = {
+    "output": "<think>This is a reasoning section",
+    "reasoning": None,
+    "content": "<think>This is a reasoning section",
+}
+
+ONLY_OPEN_TAG_STREAM = {
+    "output": "<think>This is a reasoning section",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+}
+
+TEST_CASES = [
+    pytest.param(
+        False,
+        WITH_THINK,
+        id="with_think",
+    ),
+    pytest.param(
+        True,
+        WITH_THINK_STREAM,
+        id="with_think_stream",
+    ),
+    pytest.param(
+        False,
+        WITHOUT_THINK,
+        id="without_think",
+    ),
+    pytest.param(
+        True,
+        WITHOUT_THINK_STREAM,
+        id="without_think_stream",
+    ),
+    pytest.param(
+        False,
+        COMPLETE_REASONING,
+        id="complete_reasoning",
+    ),
+    pytest.param(
+        True,
+        COMPLETE_REASONING,
+        id="complete_reasoning_stream",
+    ),
+    pytest.param(
+        False,
+        MULTILINE_REASONING,
+        id="multiline_reasoning",
+    ),
+    pytest.param(
+        True,
+        MULTILINE_REASONING,
+        id="multiline_reasoning_stream",
+    ),
+    pytest.param(
+        False,
+        ONLY_OPEN_TAG,
+        id="only_open_tag",
+    ),
+    pytest.param(
+        True,
+        ONLY_OPEN_TAG_STREAM,
+        id="only_open_tag_stream",
+    ),
+]
+
+
+@pytest.mark.parametrize("streaming, param_dict", TEST_CASES)
+def test_reasoning(
+    streaming: bool,
+    param_dict: dict,
+    qwen3_tokenizer,
+):
+    output = qwen3_tokenizer.tokenize(param_dict["output"])
+    output_tokens: list[str] = [
+        qwen3_tokenizer.convert_tokens_to_string([token]) for token in output
+    ]
+    parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser(parser_name)(
+        qwen3_tokenizer
+    )
+
+    reasoning, content = run_reasoning_extraction(
+        parser, output_tokens, streaming=streaming
+    )
+
+    assert reasoning == param_dict["reasoning"]
+    assert content == param_dict["content"]
--- a/tests/reasoning/test_seedoss_reasoning_parser.py
+++ b/tests/reasoning/test_seedoss_reasoning_parser.py
@@ -0,0 +1,236 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+from typing import Any, cast
+
+import pytest
+from transformers import AutoTokenizer
+
+from tests.reasoning.utils import run_reasoning_extraction
+from vllm.reasoning import ReasoningParser, ReasoningParserManager
+
+parser_name = "seed_oss"
+start_token = "<seed:think>"
+end_token = "</seed:think>"
+
+# Use a test model that contains our custom tokens
+REASONING_MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+
+
+@pytest.fixture(scope="module")
+def seedoss_tokenizer():
+    tokenizer = AutoTokenizer.from_pretrained(REASONING_MODEL_NAME)
+    # Add custom SeedOSS tokens if they don't exist
+    if start_token not in tokenizer.get_vocab():
+        tokenizer.add_tokens([start_token, end_token])
+    return tokenizer
+
+
+SIMPLE_REASONING: dict[str, Any] = {
+    "output": "This is a reasoning section</seed:think>This is the rest",
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+COMPLETE_REASONING: dict[str, Any] = {
+    "output": "This is a reasoning section</seed:think>",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": True,
+}
+NO_CONTENT: dict[str, Any] = {
+    "output": "This is content",
+    "reasoning": "This is content",
+    "content": None,
+    "is_reasoning_end": False,
+}
+NO_REASONING_STREAMING: dict[str, Any] = {
+    "output": "This is a reasoning section",
+    "reasoning": "This is a reasoning section",
+    "content": None,
+    "is_reasoning_end": False,
+}
+MULTIPLE_LINES: dict[str, Any] = {
+    "output": "This\nThat</seed:think>This is the rest\nThat",
+    "reasoning": "This\nThat",
+    "content": "This is the rest\nThat",
+    "is_reasoning_end": True,
+}
+WITH_START_TOKEN: dict[str, Any] = {
+    "output": ("<seed:think>This is a reasoning section</seed:think>This is the rest"),
+    "reasoning": "This is a reasoning section",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+ONLY_END_TOKEN: dict[str, Any] = {
+    "output": "Some reasoning</seed:think>This is the rest",
+    "reasoning": "Some reasoning",
+    "content": "This is the rest",
+    "is_reasoning_end": True,
+}
+NO_TOKENS: dict[str, Any] = {
+    "output": "This is just content without any reasoning tokens",
+    "reasoning": "This is just content without any reasoning tokens",
+    "content": None,
+    "is_reasoning_end": False,
+}
+
+
+def test_seedoss_reasoning_parser_creation(seedoss_tokenizer):
+    """Test that the SeedOSS reasoning parser can be created and registered."""
+    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
+    parser = parser_cls(seedoss_tokenizer)
+    assert isinstance(parser, ReasoningParser)
+    assert parser.start_token == start_token
+    assert parser.end_token == end_token
+
+
+@pytest.mark.parametrize("streaming", [True, False])
+def test_simple_reasoning(seedoss_tokenizer, streaming):
+    """Test basic reasoning extraction with both tokens."""
+    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
+    parser = parser_cls(seedoss_tokenizer)
+
+    reasoning, content = run_reasoning_extraction(
+        parser, [cast(str, SIMPLE_REASONING["output"])], streaming=streaming
+    )
+
+    assert reasoning == SIMPLE_REASONING["reasoning"]
+    assert content == SIMPLE_REASONING["content"]
+
+
+@pytest.mark.parametrize("streaming", [True, False])
+def test_complete_reasoning(seedoss_tokenizer, streaming):
+    """Test reasoning extraction when there's no content after reasoning."""
+    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
+    parser = parser_cls(seedoss_tokenizer)
+
+    reasoning, content = run_reasoning_extraction(
+        parser, [cast(str, COMPLETE_REASONING["output"])], streaming=streaming
+    )
+
+    assert reasoning == COMPLETE_REASONING["reasoning"]
+    assert content == COMPLETE_REASONING["content"]
+
+
+@pytest.mark.parametrize("streaming", [True, False])
+def test_no_content(seedoss_tokenizer, streaming):
+    """Test when there's no end token - everything is reasoning content."""
+    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
+    parser = parser_cls(seedoss_tokenizer)
+
+    reasoning, content = run_reasoning_extraction(
+        parser, [cast(str, NO_CONTENT["output"])], streaming=streaming
+    )
+
+    assert reasoning == NO_CONTENT["reasoning"]
+    assert content == NO_CONTENT["content"]
+
+
+@pytest.mark.parametrize("streaming", [True, False])
+def test_multiple_lines(seedoss_tokenizer, streaming):
+    """Test reasoning extraction with multiline content."""
+    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
+    parser = parser_cls(seedoss_tokenizer)
+
+    reasoning, content = run_reasoning_extraction(
+        parser, [cast(str, MULTIPLE_LINES["output"])], streaming=streaming
+    )
+
+    assert reasoning == MULTIPLE_LINES["reasoning"]
+    assert content == MULTIPLE_LINES["content"]
+
+
+@pytest.mark.parametrize("streaming", [True, False])
+def test_with_start_token(seedoss_tokenizer, streaming):
+    """Test reasoning extraction with both start and end tokens."""
+    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
+    parser = parser_cls(seedoss_tokenizer)
+
+    reasoning, content = run_reasoning_extraction(
+        parser, [cast(str, WITH_START_TOKEN["output"])], streaming=streaming
+    )
+
+    assert reasoning == WITH_START_TOKEN["reasoning"]
+    assert content == WITH_START_TOKEN["content"]
+
+
+@pytest.mark.parametrize("streaming", [True, False])
+def test_only_end_token(seedoss_tokenizer, streaming):
+    """
+    Test reasoning extraction with only end token
+    (SeedOSS typical behavior).
+    """
+    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
+    parser = parser_cls(seedoss_tokenizer)
+
+    reasoning, content = run_reasoning_extraction(
+        parser, [cast(str, ONLY_END_TOKEN["output"])], streaming=streaming
+    )
+
+    assert reasoning == ONLY_END_TOKEN["reasoning"]
+    assert content == ONLY_END_TOKEN["content"]
+
+
+@pytest.mark.parametrize("streaming", [True, False])
+def test_no_tokens(seedoss_tokenizer, streaming):
+    """Test when there are no reasoning tokens at all."""
+    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
+    parser = parser_cls(seedoss_tokenizer)
+
+    reasoning, content = run_reasoning_extraction(
+        parser, [cast(str, NO_TOKENS["output"])], streaming=streaming
+    )
+
+    assert reasoning == NO_TOKENS["reasoning"]
+    assert content == NO_TOKENS["content"]
+
+
+def test_is_reasoning_end(seedoss_tokenizer):
+    """Test the is_reasoning_end method."""
+    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
+    parser = parser_cls(seedoss_tokenizer)
+
+    # Test with end token present
+    end_token_id = parser.end_token_id
+    assert parser.is_reasoning_end([1, 2, end_token_id, 4]) is True
+
+    # Test without end token
+    assert parser.is_reasoning_end([1, 2, 3, 4]) is False
+
+
+def test_extract_content_ids(seedoss_tokenizer):
+    """Test the extract_content_ids method."""
+    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
+    parser = parser_cls(seedoss_tokenizer)
+
+    end_token_id = parser.end_token_id
+
+    # Test with end token in the middle
+    input_ids = [1, 2, end_token_id, 4, 5]
+    content_ids = parser.extract_content_ids(input_ids)
+    assert content_ids == [4, 5]
+
+    # Test with end token at the end
+    input_ids = [1, 2, 3, end_token_id]
+    content_ids = parser.extract_content_ids(input_ids)
+    assert content_ids == []
+
+    # Test without end token
+    input_ids = [1, 2, 3, 4]
+    content_ids = parser.extract_content_ids(input_ids)
+    assert content_ids == []
+
+
+def test_streaming_delta_processing(seedoss_tokenizer):
+    """Test streaming processing with small deltas."""
+    parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name)
+    parser = parser_cls(seedoss_tokenizer)
+
+    # Test streaming with incremental tokens
+    deltas = ["Some ", "reasoning ", "content", "</seed:think>", "Final ", "answer"]
+
+    reasoning, content = run_reasoning_extraction(parser, deltas, streaming=True)
+
+    assert reasoning == "Some reasoning content"
+    assert content == "Final answer"
--- a/tests/reasoning/utils.py
+++ b/tests/reasoning/utils.py
@@ -0,0 +1,160 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+
+from vllm.entrypoints.openai.protocol import ChatCompletionRequest, DeltaMessage
+from vllm.reasoning import ReasoningParser
+from vllm.tokenizers.mistral import MistralTokenizer
+
+
+class StreamingReasoningReconstructor:
+    def __init__(self):
+        self.reasoning = None
+        self.other_content = None
+
+    def append_delta(self, delta: DeltaMessage):
+        # content and the reasoning content should not be present
+        # at the same time
+        assert delta.content is None or delta.reasoning is None, (
+            "Both content and reasoning content are present in the delta message"
+        )
+        assert delta.reasoning == delta.reasoning_content, (
+            "reasoning_content should be present for backwards compatibility"
+        )
+        if delta.content is not None:
+            if self.other_content is None:
+                self.other_content = delta.content
+            else:
+                self.other_content += delta.content
+        else:
+            if self.reasoning is None:
+                self.reasoning = delta.reasoning
+            else:
+                self.reasoning += delta.reasoning
+
+
+def run_reasoning_extraction(
+    reasoning_parser: ReasoningParser,
+    model_output: list[str],
+    request: ChatCompletionRequest | None = None,
+    streaming: bool = False,
+) -> tuple[str | None, str | None]:
+    if streaming:
+        reconstructor = run_reasoning_extraction_streaming(
+            reasoning_parser,
+            model_output,
+            request,
+        )
+        return (
+            reconstructor.reasoning,
+            reconstructor.other_content or None,
+        )
+    else:
+        reasoning, content = run_reasoning_extraction_nonstreaming(
+            reasoning_parser, model_output, request
+        )
+        return reasoning, content
+
+
+def run_reasoning_extraction_mistral(
+    reasoning_parser: ReasoningParser,
+    model_output: list[int],
+    request: ChatCompletionRequest | None = None,
+    streaming: bool = False,
+) -> tuple[str | None, str | None]:
+    assert isinstance(reasoning_parser.model_tokenizer, MistralTokenizer), type(
+        reasoning_parser.model_tokenizer
+    )
+    if streaming:
+        reconstructor = run_reasoning_extraction_streaming_mistral(
+            reasoning_parser,
+            model_output,
+            request,
+        )
+        return (
+            reconstructor.reasoning,
+            reconstructor.other_content or None,
+        )
+    else:
+        str_output = reasoning_parser.model_tokenizer.convert_ids_to_tokens(
+            model_output
+        )
+        reasoning, content = run_reasoning_extraction_nonstreaming(
+            reasoning_parser, str_output, request
+        )
+        return reasoning, content
+
+
+def run_reasoning_extraction_nonstreaming(
+    reasoning_parser: ReasoningParser,
+    model_output: list[str],
+    request: ChatCompletionRequest | None = None,
+) -> tuple[str | None, str | None]:
+    request = request or ChatCompletionRequest(messages=[], model="test-model")
+    return reasoning_parser.extract_reasoning(
+        model_output="".join(model_output), request=request
+    )
+
+
+def run_reasoning_extraction_streaming(
+    reasoning_parser: ReasoningParser,
+    model_deltas: list[str],
+    request: ChatCompletionRequest | None = None,
+) -> StreamingReasoningReconstructor:
+    request = request or ChatCompletionRequest(messages=[], model="test-model")
+    reconstructor = StreamingReasoningReconstructor()
+    previous_text = ""
+    previous_tokens: list[int] = []
+    for delta in model_deltas:
+        token_delta = [
+            reasoning_parser.vocab.get(token)
+            for token in reasoning_parser.model_tokenizer.tokenize(delta)
+            if token in reasoning_parser.vocab
+        ]
+        current_text = previous_text + delta
+        current_tokens = previous_tokens + token_delta
+        delta_message = reasoning_parser.extract_reasoning_streaming(
+            previous_text,
+            current_text,
+            delta,
+            previous_tokens,
+            current_tokens,
+            token_delta,
+        )
+        if delta_message is not None:
+            reconstructor.append_delta(delta_message)
+        previous_text = current_text
+        previous_tokens = current_tokens
+    return reconstructor
+
+
+def run_reasoning_extraction_streaming_mistral(
+    reasoning_parser: ReasoningParser,
+    model_deltas: list[int],
+    request: ChatCompletionRequest | None = None,
+) -> StreamingReasoningReconstructor:
+    assert isinstance(reasoning_parser.model_tokenizer, MistralTokenizer), type(
+        reasoning_parser.model_tokenizer
+    )
+    request = request or ChatCompletionRequest(messages=[], model="test-model")
+    reconstructor = StreamingReasoningReconstructor()
+    previous_text = ""
+    previous_tokens: list[int] = []
+    for model_delta in model_deltas:
+        token_delta = [model_delta]
+        delta = reasoning_parser.model_tokenizer.convert_ids_to_tokens([model_delta])[0]
+        current_text = previous_text + delta
+        current_tokens = previous_tokens + token_delta
+        delta_message = reasoning_parser.extract_reasoning_streaming(
+            previous_text,
+            current_text,
+            delta,
+            previous_tokens,
+            current_tokens,
+            token_delta,
+        )
+        if delta_message is not None:
+            reconstructor.append_delta(delta_message)
+        previous_text = current_text
+        previous_tokens = current_tokens
+    return reconstructor