[Performance] Update xgrammar-related constrained decoding (#2056)

This commit is contained in:
DarkSharpness
2024-11-18 09:58:49 +09:00
committed by GitHub
parent ebaa2f3199
commit 9c745d078e
4 changed files with 47 additions and 23 deletions

View File

@@ -645,7 +645,7 @@ class ModelRunner:
# Apply regex vocab_mask
if sampling_info.vocab_mask is not None:
logits = logits.masked_fill(sampling_info.vocab_mask, float("-inf"))
sampling_info.apply_mask(logits=logits, vocab_mask=sampling_info.vocab_mask)
return logits