From 538fa0ae135c4e7ef70c65439359eff7bec2b616 Mon Sep 17 00:00:00 2001 From: Yixin Dong Date: Mon, 25 Nov 2024 04:31:25 -0500 Subject: [PATCH] [Fix] Avoid calling fill_vocab_mask for terminated requests (#2175) --- python/sglang/srt/sampling/sampling_batch_info.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/sglang/srt/sampling/sampling_batch_info.py b/python/sglang/srt/sampling/sampling_batch_info.py index 3948ed069..1624fd255 100644 --- a/python/sglang/srt/sampling/sampling_batch_info.py +++ b/python/sglang/srt/sampling/sampling_batch_info.py @@ -170,7 +170,10 @@ class SamplingBatchInfo: for i, grammar in enumerate(self.grammars): if grammar is not None: - grammar.fill_vocab_mask(self.vocab_mask, i) + try: + grammar.fill_vocab_mask(self.vocab_mask, i) + except RuntimeError: + continue def filter_batch(self, unfinished_indices: List[int], new_indices: torch.Tensor): self.penalizer_orchestrator.filter(unfinished_indices, new_indices)