Support GPU pinning for LoRA (#8697)

This commit is contained in:
Lifu Huang
2025-08-06 19:39:45 -07:00
committed by GitHub
parent 6ad6c8c9e6
commit 6210e2c4f0
13 changed files with 425 additions and 134 deletions

View File

@@ -492,12 +492,13 @@ class Engine(EngineBase):
self.tokenizer_manager.get_weights_by_name(obj, None)
)
def load_lora_adapter(self, lora_name: str, lora_path: str):
def load_lora_adapter(self, lora_name: str, lora_path: str, pinned: bool = False):
"""Load a new LoRA adapter without re-launching the engine."""
obj = LoadLoRAAdapterReqInput(
lora_name=lora_name,
lora_path=lora_path,
pinned=pinned,
)
loop = asyncio.get_event_loop()