[CI] balance unit tests (#1977)

This commit is contained in:
Lianmin Zheng
2024-11-09 16:46:14 -08:00
committed by GitHub
parent 9c939a3d8b
commit 520f0094e4
4 changed files with 25 additions and 16 deletions

View File

@@ -114,9 +114,16 @@ async def health() -> Response:
@app.get("/health_generate")
async def health_generate(request: Request) -> Response:
"""Check the health of the inference server by generating one token."""
gri = GenerateReqInput(
text="s", sampling_params={"max_new_tokens": 1, "temperature": 0.7}
)
if tokenizer_manager.is_generation:
gri = GenerateReqInput(
input_ids=[0], sampling_params={"max_new_tokens": 1, "temperature": 0.7}
)
else:
gri = EmbeddingReqInput(
input_ids=[0], sampling_params={"max_new_tokens": 1, "temperature": 0.7}
)
try:
async for _ in tokenizer_manager.generate_request(gri, request):
break