[Refactor] Remove Hicache Load & Write threads (#10127)

Co-authored-by: Zhiqiang Xie <xiezhq@stanford.edu>
This commit is contained in:
DarkSharpness
2025-09-08 22:18:50 -07:00
committed by GitHub
parent cdc56ef6c1
commit 948b01a04c
10 changed files with 215 additions and 204 deletions

View File

@@ -1807,10 +1807,6 @@ class Scheduler(
if self.spec_algorithm.is_none():
model_worker_batch = batch.get_model_worker_batch()
# update the consumer index of hicache to the running batch
self.tp_worker.set_hicache_consumer(
model_worker_batch.hicache_consumer_index
)
if self.pp_group.is_last_rank:
logits_output, next_token_ids, can_run_cuda_graph = (
self.tp_worker.forward_batch_generation(model_worker_batch)