[Bugfix] Fix structured outputs errors: TypeError: apply_token_bitmask_inplace_cpu() (#6151)

### What this PR does / why we need it?
Fix https://github.com/vllm-project/vllm-ascend/issues/5524

- vLLM version: v0.13.0
- vLLM main:
d68209402d

Signed-off-by: wjunLu <wjunlu217@gmail.com>
This commit is contained in:
wjunLu
2026-01-23 09:52:55 +08:00
committed by GitHub
parent 08a45e6053
commit 72ffc00b86
3 changed files with 3 additions and 2 deletions

View File

@@ -99,8 +99,7 @@ jobs:
pytest -sv --durations=0 tests/e2e/singlecard/test_camem.py
pytest -sv --durations=0 tests/e2e/singlecard/test_completion_with_prompt_embeds.py
pytest -sv --durations=0 tests/e2e/singlecard/test_cpu_offloading.py
# xgrammar has parameter mismatching bug, please follows: https://github.com/vllm-project/vllm-ascend/issues/5524
# pytest -sv --durations=0 tests/e2e/singlecard/test_guided_decoding.py
pytest -sv --durations=0 tests/e2e/singlecard/test_guided_decoding.py
pytest -sv --durations=0 tests/e2e/singlecard/test_ilama_lora.py
pytest -sv --durations=0 tests/e2e/singlecard/test_llama32_lora.py
pytest -sv --durations=0 tests/e2e/singlecard/test_qwen3_multi_loras.py

View File

@@ -25,6 +25,7 @@ requires = [
"msgpack",
"quart",
"numba",
"xgrammar>=0.1.30",
"fastapi<0.124.0",
"opencv-python-headless<=4.11.0.86", # Required to avoid numpy version conflict with vllm
"compressed_tensors>=0.11.0",

View File

@@ -14,6 +14,7 @@ setuptools-scm>=8
torch==2.9.0
torchvision
wheel
xgrammar>=0.1.30
pandas-stubs
opencv-python-headless<=4.11.0.86 # Required to avoid numpy version conflict with vllm
compressed_tensors>=0.11.0