[Feature] Initial support for multi-LoRA serving (#1307)

This commit is contained in:
Ying Sheng
2024-09-12 16:46:14 -07:00
committed by GitHub
parent c33d82a211
commit 712216928f
21 changed files with 1435 additions and 22 deletions

View File

@@ -266,6 +266,11 @@ class TokenizerManager:
top_logprobs_num,
obj.stream,
modalities,
(
obj.lora_path[index]
if isinstance(obj.lora_path, list)
else obj.lora_path
),
)
else: # is embedding
tokenized_obj = TokenizedEmbeddingReqInput(
@@ -364,6 +369,11 @@ class TokenizerManager:
obj.top_logprobs_num[index],
obj.stream,
modalities,
(
obj.lora_path[index]
if isinstance(obj.lora_path, list)
else obj.lora_path
),
)
else:
tokenized_obj = TokenizedEmbeddingReqInput(