fix: force max new tokens to be 1 for embedding request (#1019)

This commit is contained in:
Ying Sheng
2024-08-10 13:46:42 -07:00
committed by GitHub
parent e712837d38
commit b68c4c073b
2 changed files with 8 additions and 5 deletions

View File

@@ -44,7 +44,9 @@ class TestEmbeddingModels(unittest.TestCase):
torch_dtype=torch_dtype,
is_generation_model=False,
) as srt_runner:
srt_outputs = srt_runner.forward(prompts)
srt_outputs = srt_runner.forward(
prompts,
)
for i in range(len(prompts)):
hf_logits = torch.Tensor(hf_outputs.embed_logits[i])