Support GPU pinning for LoRA (#8697)
This commit is contained in:
@@ -1129,6 +1129,7 @@ class TokenizerManager:
|
||||
new_adapter = LoRARef(
|
||||
lora_name=obj.lora_name,
|
||||
lora_path=obj.lora_path,
|
||||
pinned=obj.pinned,
|
||||
)
|
||||
|
||||
# Trigger the actual loading operation at the backend processes.
|
||||
@@ -1186,7 +1187,7 @@ class TokenizerManager:
|
||||
|
||||
return result
|
||||
except ValueError as e:
|
||||
return UnloadLoRAAdapterReqOutput(success=False, rror_message=str(e))
|
||||
return UnloadLoRAAdapterReqOutput(success=False, error_message=str(e))
|
||||
|
||||
async def get_weights_by_name(
|
||||
self, obj: GetWeightsByNameReqInput, request: Optional[fastapi.Request] = None
|
||||
|
||||
Reference in New Issue
Block a user