From 028b8cabc4bab8dc9b4e399627bc4ba5882260ea Mon Sep 17 00:00:00 2001 From: chenweiqiang11 <1436404370@qq.com> Date: Wed, 15 Apr 2026 17:50:10 +0800 Subject: [PATCH] [BugFix][Platform] Fix extra function name in final chunk of streaming tool calls (#8178) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### What this PR does / why we need it? Fix a bug in the GLM tool call parser where the `function.name` field was incorrectly included in the final (non-first) chunks of streaming tool calls. Per OpenAI streaming semantics, `id`, `type`, and `function.name` must only appear in the **first** chunk for a given tool call index. When `_create_remaining_args_delta` was called for continuing/finishing chunks, it was incorrectly reading the function name from `delta_message.tool_calls` and re-emitting it, causing clients to see a duplicate/extra function name in the final chunk. **Root cause**: The original code always looked up the tool call in `delta_message.tool_calls` to get the name, id, and type — even when this was not the first chunk being streamed. This caused the function name to appear again in the final argument-completion chunk. **Fix**: - Track whether arguments have already been streamed (`already_streamed_args`) for each tool call index. - Only populate `fallback_tool_call_id`, `fallback_tool_call_type`, and `fallback_tool_call_name` when `already_streamed_args` is empty (i.e., this is genuinely the first chunk). - Refactored `_create_remaining_args_delta` to omit header fields entirely when all fallback values are `None`, which is the correct behavior for continuing/finishing chunks. ### Does this PR introduce _any_ user-facing change? Yes. Clients consuming the streaming tool call response will no longer receive a duplicate `function.name` in the final chunk. This fixes incorrect behavior visible in the OpenAI-compatible streaming API output for GLM models using tool calls. ### How was this patch tested? - Code review and logic analysis of the streaming tool call path in `patch_glm_tool_call_parser.py`. - Existing unit tests in `tests/ut/platform/test_patch_glm_tool_call_parser.py`. --------- Signed-off-by: chen-weipeng12 Signed-off-by: chenweiqiang11 Co-authored-by: chen-weipeng12 --- .../test_patch_glm_tool_call_parser.py | 10 ++- .../platform/patch_glm_tool_call_parser.py | 89 ++++++++++++------- 2 files changed, 63 insertions(+), 36 deletions(-) diff --git a/tests/ut/patch/platform/test_patch_glm_tool_call_parser.py b/tests/ut/patch/platform/test_patch_glm_tool_call_parser.py index c04cc05c..65303019 100644 --- a/tests/ut/patch/platform/test_patch_glm_tool_call_parser.py +++ b/tests/ut/patch/platform/test_patch_glm_tool_call_parser.py @@ -70,7 +70,11 @@ def test_create_remaining_args_delta_uses_fallback_metadata_for_args_only_delta( assert tc.function.arguments == ('{"files":[{"filepath":"HumanEval-X/README.md"}]}') -def test_create_remaining_args_delta_prefers_current_metadata_over_fallback(): +def test_create_remaining_args_delta_uses_fallback_over_original_delta(): + # _create_remaining_args_delta ignores original_delta metadata and uses + # the explicit fallback_* parameters instead. The caller is responsible + # for passing non-None fallback values only for the first chunk of a + # tool call (when the header has not yet been streamed). original_delta = DeltaMessage( tool_calls=[ DeltaToolCall( @@ -95,9 +99,9 @@ def test_create_remaining_args_delta_prefers_current_metadata_over_fallback(): ) tc = result.tool_calls[0] - assert tc.id == "call_current" + assert tc.id == "call_fallback" assert tc.type == "function" - assert tc.function.name == "current_name" + assert tc.function.name == "fallback_name" assert tc.function.arguments == "]}" diff --git a/vllm_ascend/patch/platform/patch_glm_tool_call_parser.py b/vllm_ascend/patch/platform/patch_glm_tool_call_parser.py index 3860a8a2..f8e98e9c 100644 --- a/vllm_ascend/patch/platform/patch_glm_tool_call_parser.py +++ b/vllm_ascend/patch/platform/patch_glm_tool_call_parser.py @@ -68,26 +68,38 @@ def _create_remaining_args_delta( fallback_tool_call_type: str | None = None, fallback_tool_call_name: str | None = None, ) -> DeltaMessage: - original_tc = next( - (tc for tc in delta_message.tool_calls if tc.index == index), - None, + """ + Create a delta message for remaining tool arguments. + + Per OpenAI streaming semantics, id/type/function.name must only appear + in the *first* chunk for a given tool call index. Callers must pass + non-None fallback_* values only when this is genuinely the first chunk + (i.e. nothing has been streamed yet for this tool call). When all + fallback_* are None the header fields are omitted entirely, which is the + correct behaviour for continuing/finishing chunks. + """ + include_header = any( + v is not None for v in (fallback_tool_call_id, fallback_tool_call_type, fallback_tool_call_name) ) - original_fn = original_tc.function if original_tc else None - - original_fn_name = None - if isinstance(original_fn, DeltaFunctionCall): - original_fn_name = original_fn.name - elif isinstance(original_fn, dict): - original_fn_name = original_fn.get("name") - + if not include_header: + return DeltaMessage( + tool_calls=[ + DeltaToolCall( + index=index, + function=DeltaFunctionCall( + arguments=remaining_call, + ), + ) + ] + ) return DeltaMessage( tool_calls=[ DeltaToolCall( index=index, - id=(original_tc.id if original_tc and original_tc.id is not None else fallback_tool_call_id), - type=(original_tc.type if original_tc and original_tc.type is not None else fallback_tool_call_type), + id=fallback_tool_call_id, + type=fallback_tool_call_type, function=DeltaFunctionCall( - name=(original_fn_name if original_fn_name is not None else fallback_tool_call_name), + name=fallback_tool_call_name, arguments=remaining_call, ), ) @@ -643,34 +655,45 @@ async def _patched_chat_completion_stream_generator( index = 0 if self._should_check_for_unstreamed_tool_arg_tokens(delta_message, output) and tool_parser: + already_streamed = index in streamed_tool_args[i] + already_streamed_args = streamed_tool_args[i].get(index, "") remaining_call = self._compute_remaining_tool_args( expected_args=tool_parser.prev_tool_call_arr[index].get("arguments", {}), - streamed_args=streamed_tool_args[i].get(index, ""), + streamed_args=already_streamed_args, ) - fallback_tool_call = ( - tool_parser.prev_tool_call_arr[index] if index < len(tool_parser.prev_tool_call_arr) else {} - ) + # Per OpenAI streaming semantics, id/type/name must only + # appear in the *first* chunk for a tool call index. + # Use `already_streamed` (key existence) rather than + # `already_streamed_args` (string truthiness) so that a + # first chunk with an empty arguments string does not + # cause the header to be re-emitted in a later chunk. fallback_tool_call_id = None fallback_tool_call_type = None fallback_tool_call_name = None - if isinstance(fallback_tool_call, dict): - fallback_tool_call_id = fallback_tool_call.get("id") - fallback_tool_call_type = fallback_tool_call.get("type") - fallback_tool_call_name = fallback_tool_call.get("name") + if not already_streamed: + fallback_tool_call = ( + tool_parser.prev_tool_call_arr[index] + if index < len(tool_parser.prev_tool_call_arr) + else {} + ) + if isinstance(fallback_tool_call, dict): + fallback_tool_call_id = fallback_tool_call.get("id") + fallback_tool_call_type = fallback_tool_call.get("type") + fallback_tool_call_name = fallback_tool_call.get("name") - tool_call_ids = getattr(tool_parser, "_tool_call_ids", None) - if ( - fallback_tool_call_id is None - and isinstance(tool_call_ids, list) - and index < len(tool_call_ids) - ): - fallback_tool_call_id = tool_call_ids[index] + tool_call_ids = getattr(tool_parser, "_tool_call_ids", None) + if ( + fallback_tool_call_id is None + and isinstance(tool_call_ids, list) + and index < len(tool_call_ids) + ): + fallback_tool_call_id = tool_call_ids[index] - if fallback_tool_call_type is None and ( - fallback_tool_call_id is not None or fallback_tool_call_name is not None - ): - fallback_tool_call_type = "function" + if fallback_tool_call_type is None and ( + fallback_tool_call_id is not None or fallback_tool_call_name is not None + ): + fallback_tool_call_type = "function" delta_message = self._create_remaining_args_delta( delta_message,