Support limiting max loaded loras in CPU. (#8650)
This commit is contained in:
@@ -149,6 +149,7 @@ class ServerArgs:
|
||||
max_lora_rank: Optional[int] = None
|
||||
lora_target_modules: Optional[Union[set[str], List[str]]] = None
|
||||
lora_paths: Optional[Union[dict[str, str], dict[str, LoRARef], List[str]]] = None
|
||||
max_loaded_loras: Optional[int] = None
|
||||
max_loras_per_batch: int = 8
|
||||
lora_backend: str = "triton"
|
||||
|
||||
@@ -1237,6 +1238,12 @@ class ServerArgs:
|
||||
default=8,
|
||||
help="Maximum number of adapters for a running batch, include base-only request.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-loaded-loras",
|
||||
type=int,
|
||||
default=ServerArgs.max_loaded_loras,
|
||||
help="If specified, it limits the maximum number of LoRA adapters loaded in CPU memory at a time. The value must be greater than or equal to `--max-loras-per-batch`.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lora-backend",
|
||||
type=str,
|
||||
@@ -2008,6 +2015,19 @@ class ServerArgs:
|
||||
self.max_lora_rank and self.lora_target_modules
|
||||
), "When no initial --lora-paths is provided, you need to specify both --max-lora-rank and --lora-target-modules for LoRA initialization."
|
||||
|
||||
# Validate max_loaded_loras
|
||||
if self.max_loaded_loras is not None:
|
||||
assert self.max_loaded_loras >= self.max_loras_per_batch, (
|
||||
"max_loaded_loras should be greater than or equal to max_loras_per_batch. "
|
||||
f"max_loaded_loras={self.max_loaded_loras}, max_loras_per_batch={self.max_loras_per_batch}"
|
||||
)
|
||||
assert (
|
||||
not self.lora_paths or len(self.lora_paths) <= self.max_loaded_loras
|
||||
), (
|
||||
"The number of LoRA paths should not exceed max_loaded_loras. "
|
||||
f"max_loaded_loras={self.max_loaded_loras}, lora_paths={len(self.lora_paths)}"
|
||||
)
|
||||
|
||||
def validate_disagg_tp_size(self, prefill_tp: int, decode_tp: int):
|
||||
larger_tp = max(decode_tp, prefill_tp)
|
||||
smaller_tp = min(decode_tp, prefill_tp)
|
||||
|
||||
Reference in New Issue
Block a user