[Bugfix] Fix structured outputs errors: TypeError: apply_token_bitmask_inplace_cpu() (#6151)

### What this PR does / why we need it? Fix https://github.com/vllm-project/vllm-ascend/issues/5524 - vLLM version: v0.13.0 - vLLM main: d68209402d Signed-off-by: wjunLu <wjunlu217@gmail.com>
2026-01-23 09:52:55 +08:00
parent 08a45e6053
commit 72ffc00b86
3 changed files with 3 additions and 2 deletions
--- a/.github/workflows/_e2e_test.yaml
+++ b/.github/workflows/_e2e_test.yaml
@@ -99,8 +99,7 @@ jobs:
          pytest -sv --durations=0 tests/e2e/singlecard/test_camem.py
          pytest -sv --durations=0 tests/e2e/singlecard/test_completion_with_prompt_embeds.py
          pytest -sv --durations=0 tests/e2e/singlecard/test_cpu_offloading.py
-          # xgrammar has parameter mismatching bug, please follows: https://github.com/vllm-project/vllm-ascend/issues/5524
-          # pytest -sv --durations=0 tests/e2e/singlecard/test_guided_decoding.py
+          pytest -sv --durations=0 tests/e2e/singlecard/test_guided_decoding.py
          pytest -sv --durations=0 tests/e2e/singlecard/test_ilama_lora.py
          pytest -sv --durations=0 tests/e2e/singlecard/test_llama32_lora.py
          pytest -sv --durations=0 tests/e2e/singlecard/test_qwen3_multi_loras.py
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,7 @@ requires = [
    "msgpack",
    "quart",
    "numba",
+    "xgrammar>=0.1.30",
    "fastapi<0.124.0",
    "opencv-python-headless<=4.11.0.86", # Required to avoid numpy version conflict with vllm
    "compressed_tensors>=0.11.0",
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,6 +14,7 @@ setuptools-scm>=8
 torch==2.9.0
 torchvision
 wheel
+xgrammar>=0.1.30
 pandas-stubs
 opencv-python-headless<=4.11.0.86 # Required to avoid numpy version conflict with vllm
 compressed_tensors>=0.11.0