From 22e00eeb4a4cb3a81e930619649234e34e8bd5fd Mon Sep 17 00:00:00 2001
From: Shangming Cai <caishangming@linux.alibaba.com>
Date: Mon, 28 Jul 2025 00:17:51 +0800
Subject: [PATCH] [Bugfix] Prevent PD server crash from invalid grammar (#8062)

Signed-off-by: Shangming Cai <caishangming@linux.alibaba.com>
---
 .../disaggregation/decode_schedule_batch_mixin.py  | 14 +++++++++++++-
 python/sglang/srt/disaggregation/prefill.py        | 14 +++++++++++++-
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/python/sglang/srt/disaggregation/decode_schedule_batch_mixin.py b/python/sglang/srt/disaggregation/decode_schedule_batch_mixin.py
index e1d6f61cc..3edc6b4f6 100644
--- a/python/sglang/srt/disaggregation/decode_schedule_batch_mixin.py
+++ b/python/sglang/srt/disaggregation/decode_schedule_batch_mixin.py
@@ -1,10 +1,12 @@
 from __future__ import annotations
 
 import logging
+from http import HTTPStatus
 from typing import TYPE_CHECKING
 
 import torch
 
+from sglang.srt.disaggregation.utils import prepare_abort
 from sglang.srt.model_executor.forward_batch_info import CaptureHiddenMode, ForwardMode
 from sglang.srt.sampling.sampling_batch_info import SamplingBatchInfo
 
@@ -102,7 +104,17 @@ class ScheduleBatchDisaggregationDecodeMixin:
             self.output_ids.append(req.output_ids[-1])
             self.tree_cache.cache_unfinished_req(req)
             if req.grammar is not None:
-                req.grammar.accept_token(req.output_ids[-1])
+                # FIXME: this try-except block is for handling unexpected xgrammar issue.
+                try:
+                    req.grammar.accept_token(req.output_ids[-1])
+                except ValueError as e:
+                    # Grammar accept_token can raise ValueError if the token is not in the grammar.
+                    # This can happen if the grammar is not set correctly or the token is invalid.
+                    error_message = f"Grammar accept_token failed for req {req.rid} with token {req.output_ids[-1]}: {e}"
+                    self.tree_cache.cache_finished_req(req)
+                    prepare_abort(
+                        req, error_message, status_code=HTTPStatus.INTERNAL_SERVER_ERROR
+                    )
                 req.grammar.finished = req.finished()
         self.output_ids = torch.tensor(self.output_ids, device=self.device)
 
diff --git a/python/sglang/srt/disaggregation/prefill.py b/python/sglang/srt/disaggregation/prefill.py
index bf61644cf..8217bd44c 100644
--- a/python/sglang/srt/disaggregation/prefill.py
+++ b/python/sglang/srt/disaggregation/prefill.py
@@ -425,7 +425,19 @@ class SchedulerDisaggregationPrefillMixin:
                 self.send_kv_chunk(req, last_chunk=True)
 
                 if req.grammar is not None:
-                    req.grammar.accept_token(next_token_id)
+                    # FIXME: this try-except block is for handling unexpected xgrammar issue.
+                    try:
+                        req.grammar.accept_token(next_token_id)
+                    except ValueError as e:
+                        # Grammar accept_token can raise ValueError if the token is not in the grammar.
+                        # This can happen if the grammar is not set correctly or the token is invalid.
+                        error_message = f"Grammar accept_token failed for req {req.rid} with token {next_token_id}: {e}"
+                        self.tree_cache.cache_finished_req(req)
+                        prepare_abort(
+                            req,
+                            error_message,
+                            status_code=HTTPStatus.INTERNAL_SERVER_ERROR,
+                        )
                     req.grammar.finished = req.finished()
             else:
                 # being chunked reqs' prefill is not finished