From 72ffc00b862c7c482dd7fd64b87f9ef7a9ff92f7 Mon Sep 17 00:00:00 2001 From: wjunLu <135617475+wjunLu@users.noreply.github.com> Date: Fri, 23 Jan 2026 09:52:55 +0800 Subject: [PATCH] [Bugfix] Fix structured outputs errors: `TypeError: apply_token_bitmask_inplace_cpu()` (#6151) ### What this PR does / why we need it? Fix https://github.com/vllm-project/vllm-ascend/issues/5524 - vLLM version: v0.13.0 - vLLM main: https://github.com/vllm-project/vllm/commit/d68209402ddab3f54a09bc1f4de9a9495a283b60 Signed-off-by: wjunLu --- .github/workflows/_e2e_test.yaml | 3 +-- pyproject.toml | 1 + requirements.txt | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_e2e_test.yaml b/.github/workflows/_e2e_test.yaml index a24b1cf4..efd24dfa 100644 --- a/.github/workflows/_e2e_test.yaml +++ b/.github/workflows/_e2e_test.yaml @@ -99,8 +99,7 @@ jobs: pytest -sv --durations=0 tests/e2e/singlecard/test_camem.py pytest -sv --durations=0 tests/e2e/singlecard/test_completion_with_prompt_embeds.py pytest -sv --durations=0 tests/e2e/singlecard/test_cpu_offloading.py - # xgrammar has parameter mismatching bug, please follows: https://github.com/vllm-project/vllm-ascend/issues/5524 - # pytest -sv --durations=0 tests/e2e/singlecard/test_guided_decoding.py + pytest -sv --durations=0 tests/e2e/singlecard/test_guided_decoding.py pytest -sv --durations=0 tests/e2e/singlecard/test_ilama_lora.py pytest -sv --durations=0 tests/e2e/singlecard/test_llama32_lora.py pytest -sv --durations=0 tests/e2e/singlecard/test_qwen3_multi_loras.py diff --git a/pyproject.toml b/pyproject.toml index 175a59c2..5e7e2de4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,6 +25,7 @@ requires = [ "msgpack", "quart", "numba", + "xgrammar>=0.1.30", "fastapi<0.124.0", "opencv-python-headless<=4.11.0.86", # Required to avoid numpy version conflict with vllm "compressed_tensors>=0.11.0", diff --git a/requirements.txt b/requirements.txt index ad77d732..f14d584f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,6 +14,7 @@ setuptools-scm>=8 torch==2.9.0 torchvision wheel +xgrammar>=0.1.30 pandas-stubs opencv-python-headless<=4.11.0.86 # Required to avoid numpy version conflict with vllm compressed_tensors>=0.11.0