[BugFix][Platform] Fix extra function name in final chunk of streaming tool calls (#8178)
### What this PR does / why we need it? Fix a bug in the GLM tool call parser where the `function.name` field was incorrectly included in the final (non-first) chunks of streaming tool calls. Per OpenAI streaming semantics, `id`, `type`, and `function.name` must only appear in the **first** chunk for a given tool call index. When `_create_remaining_args_delta` was called for continuing/finishing chunks, it was incorrectly reading the function name from `delta_message.tool_calls` and re-emitting it, causing clients to see a duplicate/extra function name in the final chunk. **Root cause**: The original code always looked up the tool call in `delta_message.tool_calls` to get the name, id, and type — even when this was not the first chunk being streamed. This caused the function name to appear again in the final argument-completion chunk. **Fix**: - Track whether arguments have already been streamed (`already_streamed_args`) for each tool call index. - Only populate `fallback_tool_call_id`, `fallback_tool_call_type`, and `fallback_tool_call_name` when `already_streamed_args` is empty (i.e., this is genuinely the first chunk). - Refactored `_create_remaining_args_delta` to omit header fields entirely when all fallback values are `None`, which is the correct behavior for continuing/finishing chunks. ### Does this PR introduce _any_ user-facing change? Yes. Clients consuming the streaming tool call response will no longer receive a duplicate `function.name` in the final chunk. This fixes incorrect behavior visible in the OpenAI-compatible streaming API output for GLM models using tool calls. ### How was this patch tested? - Code review and logic analysis of the streaming tool call path in `patch_glm_tool_call_parser.py`. - Existing unit tests in `tests/ut/platform/test_patch_glm_tool_call_parser.py`. --------- Signed-off-by: chen-weipeng12 <chen-weipeng12@noreply.gitcode.com> Signed-off-by: chenweiqiang11 <chenweiqiang11@noreply.github.com> Co-authored-by: chen-weipeng12 <chen-weipeng12@noreply.gitcode.com>
This commit is contained in:
@@ -70,7 +70,11 @@ def test_create_remaining_args_delta_uses_fallback_metadata_for_args_only_delta(
|
||||
assert tc.function.arguments == ('{"files":[{"filepath":"HumanEval-X/README.md"}]}')
|
||||
|
||||
|
||||
def test_create_remaining_args_delta_prefers_current_metadata_over_fallback():
|
||||
def test_create_remaining_args_delta_uses_fallback_over_original_delta():
|
||||
# _create_remaining_args_delta ignores original_delta metadata and uses
|
||||
# the explicit fallback_* parameters instead. The caller is responsible
|
||||
# for passing non-None fallback values only for the first chunk of a
|
||||
# tool call (when the header has not yet been streamed).
|
||||
original_delta = DeltaMessage(
|
||||
tool_calls=[
|
||||
DeltaToolCall(
|
||||
@@ -95,9 +99,9 @@ def test_create_remaining_args_delta_prefers_current_metadata_over_fallback():
|
||||
)
|
||||
|
||||
tc = result.tool_calls[0]
|
||||
assert tc.id == "call_current"
|
||||
assert tc.id == "call_fallback"
|
||||
assert tc.type == "function"
|
||||
assert tc.function.name == "current_name"
|
||||
assert tc.function.name == "fallback_name"
|
||||
assert tc.function.arguments == "]}"
|
||||
|
||||
|
||||
|
||||
@@ -68,26 +68,38 @@ def _create_remaining_args_delta(
|
||||
fallback_tool_call_type: str | None = None,
|
||||
fallback_tool_call_name: str | None = None,
|
||||
) -> DeltaMessage:
|
||||
original_tc = next(
|
||||
(tc for tc in delta_message.tool_calls if tc.index == index),
|
||||
None,
|
||||
"""
|
||||
Create a delta message for remaining tool arguments.
|
||||
|
||||
Per OpenAI streaming semantics, id/type/function.name must only appear
|
||||
in the *first* chunk for a given tool call index. Callers must pass
|
||||
non-None fallback_* values only when this is genuinely the first chunk
|
||||
(i.e. nothing has been streamed yet for this tool call). When all
|
||||
fallback_* are None the header fields are omitted entirely, which is the
|
||||
correct behaviour for continuing/finishing chunks.
|
||||
"""
|
||||
include_header = any(
|
||||
v is not None for v in (fallback_tool_call_id, fallback_tool_call_type, fallback_tool_call_name)
|
||||
)
|
||||
original_fn = original_tc.function if original_tc else None
|
||||
|
||||
original_fn_name = None
|
||||
if isinstance(original_fn, DeltaFunctionCall):
|
||||
original_fn_name = original_fn.name
|
||||
elif isinstance(original_fn, dict):
|
||||
original_fn_name = original_fn.get("name")
|
||||
|
||||
if not include_header:
|
||||
return DeltaMessage(
|
||||
tool_calls=[
|
||||
DeltaToolCall(
|
||||
index=index,
|
||||
function=DeltaFunctionCall(
|
||||
arguments=remaining_call,
|
||||
),
|
||||
)
|
||||
]
|
||||
)
|
||||
return DeltaMessage(
|
||||
tool_calls=[
|
||||
DeltaToolCall(
|
||||
index=index,
|
||||
id=(original_tc.id if original_tc and original_tc.id is not None else fallback_tool_call_id),
|
||||
type=(original_tc.type if original_tc and original_tc.type is not None else fallback_tool_call_type),
|
||||
id=fallback_tool_call_id,
|
||||
type=fallback_tool_call_type,
|
||||
function=DeltaFunctionCall(
|
||||
name=(original_fn_name if original_fn_name is not None else fallback_tool_call_name),
|
||||
name=fallback_tool_call_name,
|
||||
arguments=remaining_call,
|
||||
),
|
||||
)
|
||||
@@ -643,34 +655,45 @@ async def _patched_chat_completion_stream_generator(
|
||||
index = 0
|
||||
|
||||
if self._should_check_for_unstreamed_tool_arg_tokens(delta_message, output) and tool_parser:
|
||||
already_streamed = index in streamed_tool_args[i]
|
||||
already_streamed_args = streamed_tool_args[i].get(index, "")
|
||||
remaining_call = self._compute_remaining_tool_args(
|
||||
expected_args=tool_parser.prev_tool_call_arr[index].get("arguments", {}),
|
||||
streamed_args=streamed_tool_args[i].get(index, ""),
|
||||
streamed_args=already_streamed_args,
|
||||
)
|
||||
|
||||
fallback_tool_call = (
|
||||
tool_parser.prev_tool_call_arr[index] if index < len(tool_parser.prev_tool_call_arr) else {}
|
||||
)
|
||||
# Per OpenAI streaming semantics, id/type/name must only
|
||||
# appear in the *first* chunk for a tool call index.
|
||||
# Use `already_streamed` (key existence) rather than
|
||||
# `already_streamed_args` (string truthiness) so that a
|
||||
# first chunk with an empty arguments string does not
|
||||
# cause the header to be re-emitted in a later chunk.
|
||||
fallback_tool_call_id = None
|
||||
fallback_tool_call_type = None
|
||||
fallback_tool_call_name = None
|
||||
if isinstance(fallback_tool_call, dict):
|
||||
fallback_tool_call_id = fallback_tool_call.get("id")
|
||||
fallback_tool_call_type = fallback_tool_call.get("type")
|
||||
fallback_tool_call_name = fallback_tool_call.get("name")
|
||||
if not already_streamed:
|
||||
fallback_tool_call = (
|
||||
tool_parser.prev_tool_call_arr[index]
|
||||
if index < len(tool_parser.prev_tool_call_arr)
|
||||
else {}
|
||||
)
|
||||
if isinstance(fallback_tool_call, dict):
|
||||
fallback_tool_call_id = fallback_tool_call.get("id")
|
||||
fallback_tool_call_type = fallback_tool_call.get("type")
|
||||
fallback_tool_call_name = fallback_tool_call.get("name")
|
||||
|
||||
tool_call_ids = getattr(tool_parser, "_tool_call_ids", None)
|
||||
if (
|
||||
fallback_tool_call_id is None
|
||||
and isinstance(tool_call_ids, list)
|
||||
and index < len(tool_call_ids)
|
||||
):
|
||||
fallback_tool_call_id = tool_call_ids[index]
|
||||
tool_call_ids = getattr(tool_parser, "_tool_call_ids", None)
|
||||
if (
|
||||
fallback_tool_call_id is None
|
||||
and isinstance(tool_call_ids, list)
|
||||
and index < len(tool_call_ids)
|
||||
):
|
||||
fallback_tool_call_id = tool_call_ids[index]
|
||||
|
||||
if fallback_tool_call_type is None and (
|
||||
fallback_tool_call_id is not None or fallback_tool_call_name is not None
|
||||
):
|
||||
fallback_tool_call_type = "function"
|
||||
if fallback_tool_call_type is None and (
|
||||
fallback_tool_call_id is not None or fallback_tool_call_name is not None
|
||||
):
|
||||
fallback_tool_call_type = "function"
|
||||
|
||||
delta_message = self._create_remaining_args_delta(
|
||||
delta_message,
|
||||
|
||||
Reference in New Issue
Block a user