Refine the add request reasons to avoid corner cases. (#1574)

This commit is contained in:
Liangsheng Yin
2024-10-04 18:00:18 -07:00
committed by GitHub
parent 04b262cd91
commit 5d0ba4038f
2 changed files with 34 additions and 25 deletions

View File

@@ -50,7 +50,11 @@ from sglang.srt.managers.schedule_batch import (
Req,
ScheduleBatch,
)
from sglang.srt.managers.schedule_policy import PrefillAdder, SchedulePolicy
from sglang.srt.managers.schedule_policy import (
AddReqResult,
PrefillAdder,
SchedulePolicy,
)
from sglang.srt.managers.tp_worker import TpModelWorker
from sglang.srt.mem_cache.chunk_cache import ChunkCache
from sglang.srt.mem_cache.radix_cache import RadixCache
@@ -493,16 +497,15 @@ class Scheduler:
self.batch_is_full = True
break
if adder.no_remaining_tokens():
if running_bs + len(adder.can_run_list) >= self.max_running_requests:
self.batch_is_full = True
break
req.init_next_round_input(None if prefix_computed else self.tree_cache)
res = adder.add_one_req(req)
if (
not res
or running_bs + len(adder.can_run_list) >= self.max_running_requests
):
self.batch_is_full = True
if res != AddReqResult.CONTINUE:
if res == AddReqResult.NO_TOKEN:
self.batch_is_full = True
break
can_run_list = adder.can_run_list