Improve benchmark scripts and error message printing (#2922)

This commit is contained in:
Lianmin Zheng
2025-01-16 06:24:31 -08:00
committed by GitHub
parent 7596417732
commit 8f2c522aba
8 changed files with 125 additions and 70 deletions

View File

@@ -135,9 +135,13 @@ async def health_generate(request: Request) -> Response:
sampling_params = {"max_new_tokens": 1, "temperature": 0.7}
if tokenizer_manager.is_generation:
gri = GenerateReqInput(input_ids=[0], sampling_params=sampling_params)
gri = GenerateReqInput(
input_ids=[0], sampling_params=sampling_params, log_metrics=False
)
else:
gri = EmbeddingReqInput(input_ids=[0], sampling_params=sampling_params)
gri = EmbeddingReqInput(
input_ids=[0], sampling_params=sampling_params, log_metrics=False
)
try:
async for _ in tokenizer_manager.generate_request(gri, request):