Fix typo: infight -> inflight (#5357)

This commit is contained in:
Liangsheng Yin
2025-04-14 01:25:30 +08:00
committed by GitHub
parent f58b929a51
commit f765579046
2 changed files with 14 additions and 14 deletions

View File

@@ -10,9 +10,9 @@ Life cycle of a request in the prefill server
2. Waiting Queue
a. Use PrefillAdder to pop requests
b. Run forward
c. Add the request to Infight Queue
c. Add the request to Inflight Queue
3. Infight Queue
3. Inflight Queue
a. Poll (non-blocking) the sender of the request
b. Once the transfer has finished, return the request
"""
@@ -162,7 +162,7 @@ class SchedulerDisaggregationPrefillMixin:
self: Scheduler, batch: ScheduleBatch, result: GenerationBatchResult
) -> None:
"""
Transfer kv for prefill completed requests and add it into disagg_prefill_infight_queue
Transfer kv for prefill completed requests and add it into disagg_prefill_inflight_queue
Adapted from process_batch_result_prefill
"""
@@ -175,7 +175,7 @@ class SchedulerDisaggregationPrefillMixin:
req.output_ids.append(next_token_id)
self.tree_cache.cache_unfinished_req(req) # update the tree and lock
self.send_kv_chunk(req, token_id=next_token_id)
self.disagg_prefill_infight_queue.append(req)
self.disagg_prefill_inflight_queue.append(req)
else:
# being chunked reqs' prefill is not finished
req.is_chunked -= 1
@@ -187,22 +187,22 @@ class SchedulerDisaggregationPrefillMixin:
self.current_stream.synchronize()
batch.next_batch_sampling_info.sampling_info_done.set()
def process_disagg_prefill_infight_queue(self: Scheduler) -> None:
def process_disagg_prefill_inflight_queue(self: Scheduler) -> None:
"""
Poll the requests in the middle of transfer. If done, return the request.
"""
assert len(self.disagg_prefill_infight_queue) > 0
assert len(self.disagg_prefill_inflight_queue) > 0
done_reqs = []
polls = poll_and_all_reduce(
[req.disagg_kv_sender for req in self.disagg_prefill_infight_queue],
[req.disagg_kv_sender for req in self.disagg_prefill_inflight_queue],
self.tp_worker.get_tp_cpu_group(),
)
undone_reqs: List[Req] = []
# Check .poll() for the reqs in disagg_prefill_infight_queue. If Success, respond to the client and remove it from the queue
for req, poll in zip(self.disagg_prefill_infight_queue, polls):
# Check .poll() for the reqs in disagg_prefill_inflight_queue. If Success, respond to the client and remove it from the queue
for req, poll in zip(self.disagg_prefill_inflight_queue, polls):
if poll in [KVPoll.WaitingForInput, KVPoll.Transferring]:
undone_reqs.append(req)
elif poll == KVPoll.Success: # transfer done
@@ -215,7 +215,7 @@ class SchedulerDisaggregationPrefillMixin:
# Stream requests which have finished transfer
self.stream_output(done_reqs, False, None)
self.disagg_prefill_infight_queue = undone_reqs
self.disagg_prefill_inflight_queue = undone_reqs
def process_prefill_chunk(self: Scheduler) -> None:
if self.last_batch and self.last_batch.forward_mode.is_extend():

View File

@@ -594,7 +594,7 @@ class Scheduler(
gloo_group=self.tp_worker.get_attention_tp_cpu_group(),
)
# The prefill requests that are in the middle of kv sending
self.disagg_prefill_infight_queue: List[Req] = []
self.disagg_prefill_inflight_queue: List[Req] = []
@DynamicGradMode()
def event_loop_normal(self):
@@ -674,10 +674,10 @@ class Scheduler(
result = self.run_batch(batch)
self.process_batch_result_disagg_prefill(batch, result)
if len(self.disagg_prefill_infight_queue) > 0:
self.process_disagg_prefill_infight_queue()
if len(self.disagg_prefill_inflight_queue) > 0:
self.process_disagg_prefill_inflight_queue()
if batch is None and len(self.disagg_prefill_infight_queue) == 0:
if batch is None and len(self.disagg_prefill_inflight_queue) == 0:
self.check_memory()
self.new_token_ratio = self.init_new_token_ratio