[BugFix][Platform] Fix extra function name in final chunk of streaming tool calls (#8178)

### What this PR does / why we need it?

Fix a bug in the GLM tool call parser where the `function.name` field
was incorrectly included in the final (non-first) chunks of streaming
tool calls.

Per OpenAI streaming semantics, `id`, `type`, and `function.name` must
only appear in the **first** chunk for a given tool call index. When
`_create_remaining_args_delta` was called for continuing/finishing
chunks, it was incorrectly reading the function name from
`delta_message.tool_calls` and re-emitting it, causing clients to see a
duplicate/extra function name in the final chunk.

**Root cause**: The original code always looked up the tool call in
`delta_message.tool_calls` to get the name, id, and type — even when
this was not the first chunk being streamed. This caused the function
name to appear again in the final argument-completion chunk.

**Fix**:
- Track whether arguments have already been streamed
(`already_streamed_args`) for each tool call index.
- Only populate `fallback_tool_call_id`, `fallback_tool_call_type`, and
`fallback_tool_call_name` when `already_streamed_args` is empty (i.e.,
this is genuinely the first chunk).
- Refactored `_create_remaining_args_delta` to omit header fields
entirely when all fallback values are `None`, which is the correct
behavior for continuing/finishing chunks.

### Does this PR introduce _any_ user-facing change?

Yes. Clients consuming the streaming tool call response will no longer
receive a duplicate `function.name` in the final chunk. This fixes
incorrect behavior visible in the OpenAI-compatible streaming API output
for GLM models using tool calls.

### How was this patch tested?

- Code review and logic analysis of the streaming tool call path in
`patch_glm_tool_call_parser.py`.
- Existing unit tests in
`tests/ut/platform/test_patch_glm_tool_call_parser.py`.

---------

Signed-off-by: chen-weipeng12 <chen-weipeng12@noreply.gitcode.com>
Signed-off-by: chenweiqiang11 <chenweiqiang11@noreply.github.com>
Co-authored-by: chen-weipeng12 <chen-weipeng12@noreply.gitcode.com>
This commit is contained in:
chenweiqiang11
2026-04-15 17:50:10 +08:00
committed by GitHub
parent 808d00406f
commit 028b8cabc4
2 changed files with 63 additions and 36 deletions

View File

@@ -70,7 +70,11 @@ def test_create_remaining_args_delta_uses_fallback_metadata_for_args_only_delta(
assert tc.function.arguments == ('{"files":[{"filepath":"HumanEval-X/README.md"}]}')
def test_create_remaining_args_delta_prefers_current_metadata_over_fallback():
def test_create_remaining_args_delta_uses_fallback_over_original_delta():
# _create_remaining_args_delta ignores original_delta metadata and uses
# the explicit fallback_* parameters instead. The caller is responsible
# for passing non-None fallback values only for the first chunk of a
# tool call (when the header has not yet been streamed).
original_delta = DeltaMessage(
tool_calls=[
DeltaToolCall(
@@ -95,9 +99,9 @@ def test_create_remaining_args_delta_prefers_current_metadata_over_fallback():
)
tc = result.tool_calls[0]
assert tc.id == "call_current"
assert tc.id == "call_fallback"
assert tc.type == "function"
assert tc.function.name == "current_name"
assert tc.function.name == "fallback_name"
assert tc.function.arguments == "]}"

View File

@@ -68,26 +68,38 @@ def _create_remaining_args_delta(
fallback_tool_call_type: str | None = None,
fallback_tool_call_name: str | None = None,
) -> DeltaMessage:
original_tc = next(
(tc for tc in delta_message.tool_calls if tc.index == index),
None,
"""
Create a delta message for remaining tool arguments.
Per OpenAI streaming semantics, id/type/function.name must only appear
in the *first* chunk for a given tool call index. Callers must pass
non-None fallback_* values only when this is genuinely the first chunk
(i.e. nothing has been streamed yet for this tool call). When all
fallback_* are None the header fields are omitted entirely, which is the
correct behaviour for continuing/finishing chunks.
"""
include_header = any(
v is not None for v in (fallback_tool_call_id, fallback_tool_call_type, fallback_tool_call_name)
)
original_fn = original_tc.function if original_tc else None
original_fn_name = None
if isinstance(original_fn, DeltaFunctionCall):
original_fn_name = original_fn.name
elif isinstance(original_fn, dict):
original_fn_name = original_fn.get("name")
if not include_header:
return DeltaMessage(
tool_calls=[
DeltaToolCall(
index=index,
function=DeltaFunctionCall(
arguments=remaining_call,
),
)
]
)
return DeltaMessage(
tool_calls=[
DeltaToolCall(
index=index,
id=(original_tc.id if original_tc and original_tc.id is not None else fallback_tool_call_id),
type=(original_tc.type if original_tc and original_tc.type is not None else fallback_tool_call_type),
id=fallback_tool_call_id,
type=fallback_tool_call_type,
function=DeltaFunctionCall(
name=(original_fn_name if original_fn_name is not None else fallback_tool_call_name),
name=fallback_tool_call_name,
arguments=remaining_call,
),
)
@@ -643,34 +655,45 @@ async def _patched_chat_completion_stream_generator(
index = 0
if self._should_check_for_unstreamed_tool_arg_tokens(delta_message, output) and tool_parser:
already_streamed = index in streamed_tool_args[i]
already_streamed_args = streamed_tool_args[i].get(index, "")
remaining_call = self._compute_remaining_tool_args(
expected_args=tool_parser.prev_tool_call_arr[index].get("arguments", {}),
streamed_args=streamed_tool_args[i].get(index, ""),
streamed_args=already_streamed_args,
)
fallback_tool_call = (
tool_parser.prev_tool_call_arr[index] if index < len(tool_parser.prev_tool_call_arr) else {}
)
# Per OpenAI streaming semantics, id/type/name must only
# appear in the *first* chunk for a tool call index.
# Use `already_streamed` (key existence) rather than
# `already_streamed_args` (string truthiness) so that a
# first chunk with an empty arguments string does not
# cause the header to be re-emitted in a later chunk.
fallback_tool_call_id = None
fallback_tool_call_type = None
fallback_tool_call_name = None
if isinstance(fallback_tool_call, dict):
fallback_tool_call_id = fallback_tool_call.get("id")
fallback_tool_call_type = fallback_tool_call.get("type")
fallback_tool_call_name = fallback_tool_call.get("name")
if not already_streamed:
fallback_tool_call = (
tool_parser.prev_tool_call_arr[index]
if index < len(tool_parser.prev_tool_call_arr)
else {}
)
if isinstance(fallback_tool_call, dict):
fallback_tool_call_id = fallback_tool_call.get("id")
fallback_tool_call_type = fallback_tool_call.get("type")
fallback_tool_call_name = fallback_tool_call.get("name")
tool_call_ids = getattr(tool_parser, "_tool_call_ids", None)
if (
fallback_tool_call_id is None
and isinstance(tool_call_ids, list)
and index < len(tool_call_ids)
):
fallback_tool_call_id = tool_call_ids[index]
tool_call_ids = getattr(tool_parser, "_tool_call_ids", None)
if (
fallback_tool_call_id is None
and isinstance(tool_call_ids, list)
and index < len(tool_call_ids)
):
fallback_tool_call_id = tool_call_ids[index]
if fallback_tool_call_type is None and (
fallback_tool_call_id is not None or fallback_tool_call_name is not None
):
fallback_tool_call_type = "function"
if fallback_tool_call_type is None and (
fallback_tool_call_id is not None or fallback_tool_call_name is not None
):
fallback_tool_call_type = "function"
delta_message = self._create_remaining_args_delta(
delta_message,