[Feature] Initial support for multi-LoRA serving (#1307)

2024-09-12 16:46:14 -07:00
parent c33d82a211
commit 712216928f
21 changed files with 1435 additions and 22 deletions
--- a/test/srt/models/test_generation_models.py
+++ b/test/srt/models/test_generation_models.py
@@ -76,6 +76,7 @@ class TestGenerationModels(unittest.TestCase):
    ) -> None:
        if model_path == "Alibaba-NLP/gte-Qwen2-1.5B-instruct":
            prompts = prompts[:-1]
+
        with HFRunner(
            model_path, torch_dtype=torch_dtype, is_generation=True
        ) as hf_runner: