From d81101acdd3683cdf5289f4f7dfff0610044921d Mon Sep 17 00:00:00 2001
From: jack <QwertyJack@users.noreply.github.com>
Date: Thu, 23 Apr 2026 16:46:10 +0800
Subject: [PATCH] [releases/v0.18.0][Platform][BugFix] Guard forced tool choice
 with empty content (#8400)

### What this PR does / why we need it?

This backports the forced-tool-choice `content=None` guard to the
`releases/v0.18.0` compatibility layer.

Upstream vLLM still has forced named tool-choice branches that assert
`content is not None` after reasoning extraction. Some reasoning parsers
can legally consume the full output and return `(reasoning, None)`,
which makes the assert reachable and can surface as a server-side
failure.

This PR follows the same compatibility-patch pattern used by:
- `7314bbe2` fix(platform): reimplement MiniMax usage accounting patch
(#7835)
- `f83cb0e6` [Bugfix][Platform] Fix GLM47 tool-call finish backfill
(#7710)

The patch is intentionally narrow:
- normalize `content=None` to `""` only for forced named tool choice
- patch both chat-completions and responses parser entry points
- keep the rest of upstream behavior unchanged

Upstream tracking:
- issue: vllm-project/vllm#40147
- PR: vllm-project/vllm#40148

### Does this PR introduce _any_ user-facing change?

Yes.

Forced named tool choice becomes robust when the reasoning parser
returns no post-reasoning content, avoiding an internal assertion
failure and emitting an empty-argument function call instead.

### How was this patch tested?

Unit tests:
```bash
pytest -sv tests/ut/patch/platform/test_patch_tool_choice_none_content.py \
  tests/ut/patch/platform/test_patch_glm_tool_call_parser.py \
  tests/ut/patch/platform/test_patch_minimax_usage_accounting.py
```

Result: 22 passed.

---------

Signed-off-by: QwertyJack <7554089+QwertyJack@users.noreply.github.com>
Co-authored-by: QwertyJack <7554089+QwertyJack@users.noreply.github.com>
---
 .../test_patch_tool_choice_none_content.py    | 95 +++++++++++++++++++
 vllm_ascend/patch/__init__.py                 | 20 ++++
 vllm_ascend/patch/platform/__init__.py        |  1 +
 .../patch_tool_choice_none_content.py         | 86 +++++++++++++++++
 4 files changed, 202 insertions(+)
 create mode 100644 tests/ut/patch/platform/test_patch_tool_choice_none_content.py
 create mode 100644 vllm_ascend/patch/platform/patch_tool_choice_none_content.py

diff --git a/tests/ut/patch/platform/test_patch_tool_choice_none_content.py b/tests/ut/patch/platform/test_patch_tool_choice_none_content.py
new file mode 100644
index 00000000..5e6e0424
--- /dev/null
+++ b/tests/ut/patch/platform/test_patch_tool_choice_none_content.py
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: Apache-2.0
+
+from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
+from vllm.entrypoints.openai.engine.serving import OpenAIServing
+from vllm.entrypoints.openai.responses.protocol import ResponsesRequest
+from vllm.parser.abstract_parser import DelegatingParser
+
+from vllm_ascend.patch.platform import patch_tool_choice_none_content  # noqa: F401
+
+
+class _DummyDelegatingParser(DelegatingParser):
+    def is_reasoning_end(self, input_ids: list[int]) -> bool:
+        return False
+
+    def extract_content_ids(self, input_ids: list[int]) -> list[int]:
+        return input_ids
+
+    def extract_reasoning(self, model_output: str, request):
+        return None, model_output
+
+    def extract_reasoning_streaming(
+        self,
+        previous_text: str,
+        current_text: str,
+        delta_text: str,
+        previous_token_ids: list[int],
+        current_token_ids: list[int],
+        delta_token_ids: list[int],
+    ):
+        return None
+
+    def extract_tool_calls(self, model_output: str, request):
+        return None
+
+
+def test_parse_tool_calls_from_content_allows_named_tool_choice_with_none_content():
+    request = ChatCompletionRequest.model_validate(
+        {
+            "model": "test-model",
+            "messages": [{"role": "user", "content": "test"}],
+            "tools": [
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "get_weather",
+                        "parameters": {"type": "object", "properties": {}},
+                    },
+                }
+            ],
+            "tool_choice": {"type": "function", "function": {"name": "get_weather"}},
+        }
+    )
+
+    tool_calls, content = OpenAIServing._parse_tool_calls_from_content(
+        request=request,
+        tokenizer=None,
+        enable_auto_tools=True,
+        tool_parser_cls=None,
+        content=None,
+    )
+
+    assert content is None
+    assert tool_calls is not None
+    assert len(tool_calls) == 1
+    assert tool_calls[0].name == "get_weather"
+    assert tool_calls[0].arguments == ""
+
+
+def test_responses_parser_allows_named_tool_choice_with_none_content():
+    request = ResponsesRequest.model_validate(
+        {
+            "model": "test-model",
+            "input": "test",
+            "tools": [
+                {
+                    "type": "function",
+                    "name": "get_weather",
+                    "parameters": {"type": "object", "properties": {}},
+                }
+            ],
+            "tool_choice": {"type": "function", "name": "get_weather"},
+        }
+    )
+    parser = _DummyDelegatingParser(tokenizer=None)
+
+    tool_calls, content = parser._parse_tool_calls(
+        request=request,
+        content=None,
+        enable_auto_tools=False,
+    )
+
+    assert content is None
+    assert len(tool_calls) == 1
+    assert tool_calls[0].name == "get_weather"
+    assert tool_calls[0].arguments == ""
diff --git a/vllm_ascend/patch/__init__.py b/vllm_ascend/patch/__init__.py
index 4d8c086d..4c719e40 100644
--- a/vllm_ascend/patch/__init__.py
+++ b/vllm_ascend/patch/__init__.py
@@ -238,6 +238,26 @@
 #       finish-backfill fix are present in the runtime vLLM version used by
 #       vllm-ascend.
 #
+# ** 11. File: platform/patch_tool_choice_none_content.py**
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#   1. `vllm.entrypoints.openai.engine.serving.OpenAIServing`
+#      `vllm.parser.abstract_parser.DelegatingParser`
+#    Why:
+#       Some reasoning parsers can consume the full model output and return
+#       `content=None`. On the release runtime, forced named tool choice still
+#       asserts that content is present before constructing a function call,
+#       which can surface as a server-side failure instead of an empty-argument
+#       tool call.
+#    How：
+#       Monkey-patch the forced-tool-choice parsing entry points to normalize
+#       `content=None` to `""` before delegating back to the original upstream
+#       implementations.
+#    Related PR (if no, explain why):
+#       https://github.com/vllm-project/vllm/pull/40148
+#    Future Plan:
+#       Remove this patch once the upstream forced-tool-choice fix is included
+#       in the runtime vLLM version used by vllm-ascend.
+#
 # * Worker Patch:
 # ===============
 #
diff --git a/vllm_ascend/patch/platform/__init__.py b/vllm_ascend/patch/platform/__init__.py
index 306a63e3..bbcd1df2 100644
--- a/vllm_ascend/patch/platform/__init__.py
+++ b/vllm_ascend/patch/platform/__init__.py
@@ -30,6 +30,7 @@ import vllm_ascend.patch.platform.patch_sched_yield  # noqa
 import vllm_ascend.patch.platform.patch_torch_accelerator  # noqa
 import vllm_ascend.patch.platform.patch_minimax_usage_accounting  # noqa
 import vllm_ascend.patch.platform.patch_glm_tool_call_parser  # noqa
+import vllm_ascend.patch.platform.patch_tool_choice_none_content  # noqa
 
 if envs.VLLM_ASCEND_BALANCE_SCHEDULING:
     import vllm_ascend.patch.platform.patch_balance_schedule  # noqa
diff --git a/vllm_ascend/patch/platform/patch_tool_choice_none_content.py b/vllm_ascend/patch/platform/patch_tool_choice_none_content.py
new file mode 100644
index 00000000..a64ddf42
--- /dev/null
+++ b/vllm_ascend/patch/platform/patch_tool_choice_none_content.py
@@ -0,0 +1,86 @@
+#
+# Copyright (c) 2026 Huawei Technologies Co., Ltd. All Rights Reserved.
+# This file is a part of the vllm-ascend project.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# OpenAI forced tool choice: tolerate None content after reasoning extraction.
+#
+
+from __future__ import annotations
+
+from openai.types.responses import ToolChoiceFunction
+from vllm.entrypoints.openai.chat_completion.protocol import (
+    ChatCompletionNamedToolChoiceParam,
+)
+from vllm.entrypoints.openai.engine.serving import OpenAIServing
+from vllm.parser.abstract_parser import DelegatingParser
+
+
+def _normalize_tool_choice_content(
+    request,
+    content: str | None,
+) -> str | None:
+    if content is not None:
+        return content
+
+    tool_choice = getattr(request, "tool_choice", None)
+    if isinstance(
+        tool_choice,
+        (ToolChoiceFunction, ChatCompletionNamedToolChoiceParam),
+    ):
+        return ""
+    return content
+
+
+_original_parse_tool_calls_from_content = OpenAIServing._parse_tool_calls_from_content
+
+
+def _patched_parse_tool_calls_from_content(
+    request,
+    tokenizer,
+    enable_auto_tools: bool,
+    tool_parser_cls,
+    content: str | None = None,
+):
+    content = _normalize_tool_choice_content(request, content)
+    return _original_parse_tool_calls_from_content(
+        request=request,
+        tokenizer=tokenizer,
+        enable_auto_tools=enable_auto_tools,
+        tool_parser_cls=tool_parser_cls,
+        content=content,
+    )
+
+
+OpenAIServing._parse_tool_calls_from_content = staticmethod(_patched_parse_tool_calls_from_content)
+
+_original_delegating_parse_tool_calls = DelegatingParser._parse_tool_calls
+
+
+def _patched_delegating_parse_tool_calls(
+    self,
+    request,
+    content: str | None,
+    enable_auto_tools: bool,
+):
+    content = _normalize_tool_choice_content(request, content)
+    return _original_delegating_parse_tool_calls(
+        self,
+        request,
+        content,
+        enable_auto_tools,
+    )
+
+
+DelegatingParser._parse_tool_calls = _patched_delegating_parse_tool_calls