Support GPU pinning for LoRA (#8697)

This commit is contained in:
Lifu Huang
2025-08-06 19:39:45 -07:00
committed by GitHub
parent 6ad6c8c9e6
commit 6210e2c4f0
13 changed files with 425 additions and 134 deletions

View File

@@ -2067,21 +2067,23 @@ class ServerArgs:
if self.enable_lora:
# Normalize lora_paths to a dictionary if it is a list.
# TODO (lifuhuang): support specifying pinned adapters in server_args.
if isinstance(self.lora_paths, list):
lora_paths = self.lora_paths
self.lora_paths = {}
for lora_path in lora_paths:
if "=" in lora_path:
name, path = lora_path.split("=", 1)
self.lora_paths[name] = LoRARef(lora_name=name, lora_path=path)
self.lora_paths[name] = LoRARef(
lora_name=name, lora_path=path, pinned=False
)
else:
self.lora_paths[lora_path] = LoRARef(
lora_name=lora_path,
lora_path=lora_path,
lora_name=lora_path, lora_path=lora_path, pinned=False
)
elif isinstance(self.lora_paths, dict):
self.lora_paths = {
k: LoRARef(lora_name=k, lora_path=v)
k: LoRARef(lora_name=k, lora_path=v, pinned=False)
for k, v in self.lora_paths.items()
}
elif self.lora_paths is None: