[Fix] Avoid calling fill_vocab_mask for terminated requests (#2175)
This commit is contained in:
@@ -170,7 +170,10 @@ class SamplingBatchInfo:
|
|||||||
|
|
||||||
for i, grammar in enumerate(self.grammars):
|
for i, grammar in enumerate(self.grammars):
|
||||||
if grammar is not None:
|
if grammar is not None:
|
||||||
|
try:
|
||||||
grammar.fill_vocab_mask(self.vocab_mask, i)
|
grammar.fill_vocab_mask(self.vocab_mask, i)
|
||||||
|
except RuntimeError:
|
||||||
|
continue
|
||||||
|
|
||||||
def filter_batch(self, unfinished_indices: List[int], new_indices: torch.Tensor):
|
def filter_batch(self, unfinished_indices: List[int], new_indices: torch.Tensor):
|
||||||
self.penalizer_orchestrator.filter(unfinished_indices, new_indices)
|
self.penalizer_orchestrator.filter(unfinished_indices, new_indices)
|
||||||
|
|||||||
Reference in New Issue
Block a user