[Performance] Update xgrammar-related constrained decoding (#2056)
This commit is contained in:
@@ -645,7 +645,7 @@ class ModelRunner:
|
||||
|
||||
# Apply regex vocab_mask
|
||||
if sampling_info.vocab_mask is not None:
|
||||
logits = logits.masked_fill(sampling_info.vocab_mask, float("-inf"))
|
||||
sampling_info.apply_mask(logits=logits, vocab_mask=sampling_info.vocab_mask)
|
||||
|
||||
return logits
|
||||
|
||||
|
||||
Reference in New Issue
Block a user