From 22e00eeb4a4cb3a81e930619649234e34e8bd5fd Mon Sep 17 00:00:00 2001 From: Shangming Cai Date: Mon, 28 Jul 2025 00:17:51 +0800 Subject: [PATCH] [Bugfix] Prevent PD server crash from invalid grammar (#8062) Signed-off-by: Shangming Cai --- .../disaggregation/decode_schedule_batch_mixin.py | 14 +++++++++++++- python/sglang/srt/disaggregation/prefill.py | 14 +++++++++++++- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/python/sglang/srt/disaggregation/decode_schedule_batch_mixin.py b/python/sglang/srt/disaggregation/decode_schedule_batch_mixin.py index e1d6f61cc..3edc6b4f6 100644 --- a/python/sglang/srt/disaggregation/decode_schedule_batch_mixin.py +++ b/python/sglang/srt/disaggregation/decode_schedule_batch_mixin.py @@ -1,10 +1,12 @@ from __future__ import annotations import logging +from http import HTTPStatus from typing import TYPE_CHECKING import torch +from sglang.srt.disaggregation.utils import prepare_abort from sglang.srt.model_executor.forward_batch_info import CaptureHiddenMode, ForwardMode from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo @@ -102,7 +104,17 @@ class ScheduleBatchDisaggregationDecodeMixin: self.output_ids.append(req.output_ids[-1]) self.tree_cache.cache_unfinished_req(req) if req.grammar is not None: - req.grammar.accept_token(req.output_ids[-1]) + # FIXME: this try-except block is for handling unexpected xgrammar issue. + try: + req.grammar.accept_token(req.output_ids[-1]) + except ValueError as e: + # Grammar accept_token can raise ValueError if the token is not in the grammar. + # This can happen if the grammar is not set correctly or the token is invalid. + error_message = f"Grammar accept_token failed for req {req.rid} with token {req.output_ids[-1]}: {e}" + self.tree_cache.cache_finished_req(req) + prepare_abort( + req, error_message, status_code=HTTPStatus.INTERNAL_SERVER_ERROR + ) req.grammar.finished = req.finished() self.output_ids = torch.tensor(self.output_ids, device=self.device) diff --git a/python/sglang/srt/disaggregation/prefill.py b/python/sglang/srt/disaggregation/prefill.py index bf61644cf..8217bd44c 100644 --- a/python/sglang/srt/disaggregation/prefill.py +++ b/python/sglang/srt/disaggregation/prefill.py @@ -425,7 +425,19 @@ class SchedulerDisaggregationPrefillMixin: self.send_kv_chunk(req, last_chunk=True) if req.grammar is not None: - req.grammar.accept_token(next_token_id) + # FIXME: this try-except block is for handling unexpected xgrammar issue. + try: + req.grammar.accept_token(next_token_id) + except ValueError as e: + # Grammar accept_token can raise ValueError if the token is not in the grammar. + # This can happen if the grammar is not set correctly or the token is invalid. + error_message = f"Grammar accept_token failed for req {req.rid} with token {next_token_id}: {e}" + self.tree_cache.cache_finished_req(req) + prepare_abort( + req, + error_message, + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + ) req.grammar.finished = req.finished() else: # being chunked reqs' prefill is not finished