Support GPU pinning for LoRA (#8697)

This commit is contained in:
Lifu Huang
2025-08-06 19:39:45 -07:00
committed by GitHub
parent 6ad6c8c9e6
commit 6210e2c4f0
13 changed files with 425 additions and 134 deletions

View File

@@ -1538,14 +1538,11 @@ class Scheduler(
# Get requests from the waiting queue to a new prefill batch
for req in self.waiting_queue:
if (
self.enable_lora
and len(
lora_set
| set([req.lora_id for req in adder.can_run_list])
| set([req.lora_id])
)
> self.max_loras_per_batch
if self.enable_lora and not self.tp_worker.can_run_lora_batch(
lora_set
| set([req.lora_id for req in adder.can_run_list])
| set([req.lora_id])
):
self.running_batch.batch_is_full = True
break