[v0.18.0][Bugfix][Platform] Fix MiniMax M2 reasoning token usage accounting (#7700)
### What this PR does / why we need it? This backports the MiniMax M2 reasoning-token usage accounting fix onto `releases/v0.18.0` for vllm-ascend. The release branch does not include the other local GLM patch commit, so this PR keeps the MiniMax change self-contained by: - registering `patch_minimax_usage_accounting` on the release branch - backporting `completion_tokens_details.reasoning_tokens` into chat usage generation - fixing MiniMax reasoning token counting for `</think>`-delimited outputs without depending on the GLM suffix patch ### Does this PR introduce _any_ user-facing change? Yes. OpenAI-compatible chat usage accounting for MiniMax M2 responses now reports corrected reasoning token counts on the release branch. ### How was this patch tested? - `python -m compileall vllm_ascend/patch/platform/patch_minimax_usage_accounting.py` - `python - <<'PY'` import check for `vllm_ascend.patch.platform.patch_minimax_usage_accounting` on top of `releases/v0.18.0` No targeted automated regression test exists for this release-branch backport yet, so I validated syntax and module import compatibility on the release branch. --------- Signed-off-by: QwertyJack <7554089+QwertyJack@users.noreply.github.com> Co-authored-by: QwertyJack <7554089+QwertyJack@users.noreply.github.com>
This commit is contained in:
@@ -0,0 +1,67 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
|
||||
import pytest
|
||||
|
||||
from vllm.reasoning.minimax_m2_reasoning_parser import (
|
||||
MiniMaxM2AppendThinkReasoningParser,
|
||||
MiniMaxM2ReasoningParser,
|
||||
)
|
||||
|
||||
|
||||
class FakeTokenizer:
|
||||
def get_vocab(self):
|
||||
return {
|
||||
"<think>": 1,
|
||||
"</think>": 2,
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
("parser_cls", "token_ids", "expected_reasoning_tokens"),
|
||||
[
|
||||
pytest.param(
|
||||
MiniMaxM2ReasoningParser,
|
||||
[10, 11, 2, 20],
|
||||
2,
|
||||
id="minimax-reasoning-before-end-token",
|
||||
),
|
||||
pytest.param(
|
||||
MiniMaxM2AppendThinkReasoningParser,
|
||||
[10, 11, 2, 20],
|
||||
2,
|
||||
id="append-think-reasoning-before-end-token",
|
||||
),
|
||||
pytest.param(
|
||||
MiniMaxM2ReasoningParser,
|
||||
[10, 11, 20],
|
||||
3,
|
||||
id="minimax-all-tokens-are-reasoning-before-end-token",
|
||||
),
|
||||
pytest.param(
|
||||
MiniMaxM2AppendThinkReasoningParser,
|
||||
[10, 11, 20],
|
||||
3,
|
||||
id="append-think-all-tokens-are-reasoning-before-end-token",
|
||||
),
|
||||
pytest.param(
|
||||
MiniMaxM2ReasoningParser,
|
||||
[2, 20],
|
||||
0,
|
||||
id="minimax-end-token-first-means-no-reasoning-tokens",
|
||||
),
|
||||
pytest.param(
|
||||
MiniMaxM2AppendThinkReasoningParser,
|
||||
[2, 20],
|
||||
0,
|
||||
id="append-think-end-token-first-means-no-reasoning-tokens",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_count_reasoning_tokens(
|
||||
parser_cls,
|
||||
token_ids,
|
||||
expected_reasoning_tokens,
|
||||
):
|
||||
parser = parser_cls(FakeTokenizer())
|
||||
|
||||
assert parser.count_reasoning_tokens(token_ids) == expected_reasoning_tokens
|
||||
Reference in New Issue
Block a user