Support GPU pinning for LoRA (#8697)

This commit is contained in:
Lifu Huang
2025-08-06 19:39:45 -07:00
committed by GitHub
parent 6ad6c8c9e6
commit 6210e2c4f0
13 changed files with 425 additions and 134 deletions

View File

@@ -1129,6 +1129,7 @@ class TokenizerManager:
new_adapter = LoRARef(
lora_name=obj.lora_name,
lora_path=obj.lora_path,
pinned=obj.pinned,
)
# Trigger the actual loading operation at the backend processes.
@@ -1186,7 +1187,7 @@ class TokenizerManager:
return result
except ValueError as e:
return UnloadLoRAAdapterReqOutput(success=False, rror_message=str(e))
return UnloadLoRAAdapterReqOutput(success=False, error_message=str(e))
async def get_weights_by_name(
self, obj: GetWeightsByNameReqInput, request: Optional[fastapi.Request] = None