Support Alibaba-NLP/gte-Qwen2-7B-instruct embedding Model (#1186)

Co-authored-by: Ying Sheng <sqy1415@gmail.com>
This commit is contained in:
Chayenne
2024-08-26 01:29:12 +08:00
committed by GitHub
parent 66e7dcaf70
commit 30b4f771b0
15 changed files with 167 additions and 55 deletions

View File

@@ -94,7 +94,10 @@ class TokenizerManager:
trust_remote_code=server_args.trust_remote_code,
model_overide_args=model_overide_args,
)
self.is_generation = is_generation_model(self.hf_config.architectures)
self.is_generation = is_generation_model(
self.hf_config.architectures, self.server_args.is_embedding
)
if server_args.context_length is not None:
self.context_len = server_args.context_length