diff --git a/python/sglang/srt/managers/schedule_policy.py b/python/sglang/srt/managers/schedule_policy.py index 777ecd343..3cd2be26b 100644 --- a/python/sglang/srt/managers/schedule_policy.py +++ b/python/sglang/srt/managers/schedule_policy.py @@ -468,6 +468,9 @@ class PrefillAdder: return AddReqResult.OTHER with self._lock_node(req.last_node): + if total_tokens > self.rem_total_tokens: + return AddReqResult.NO_TOKEN + if ( enable_hierarchical_cache and req.last_node_global is not None