[Feature] Initial support for multi-LoRA serving (#1307)

This commit is contained in:
Ying Sheng
2024-09-12 16:46:14 -07:00
committed by GitHub
parent c33d82a211
commit 712216928f
21 changed files with 1435 additions and 22 deletions

View File

@@ -76,6 +76,7 @@ class TestGenerationModels(unittest.TestCase):
) -> None:
if model_path == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
prompts = prompts[:-1]
with HFRunner(
model_path, torch_dtype=torch_dtype, is_generation=True
) as hf_runner: