Fix popen_launch_server wait for 20 minutes when child process exits (#4777)

This commit is contained in:
fzyzcjy
2025-03-26 15:32:19 +08:00
committed by GitHub
parent 0d658ac3df
commit fa3c9e0668

View File

@@ -1,7 +1,6 @@
"""Common utilities for testing and benchmarking"""
import argparse
import asyncio
import copy
import logging
import os
@@ -64,7 +63,6 @@ DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_QUANT_TP1 = "hugging-quants/Meta-Llama-3.1-8
DEFAULT_SMALL_MODEL_NAME_FOR_TEST_QWEN = "Qwen/Qwen2.5-1.5B-Instruct"
DEFAULT_SMALL_VLM_MODEL_NAME = "Qwen/Qwen2-VL-2B"
DEFAULT_EAGLE_TARGET_MODEL_FOR_TEST = "meta-llama/Llama-2-7b-chat-hf"
DEFAULT_EAGLE_DRAFT_MODEL_FOR_TEST = "lmsys/sglang-EAGLE-llama2-chat-7B"
@@ -435,6 +433,11 @@ def popen_launch_server(
return process
except requests.RequestException:
pass
return_code = process.poll()
if return_code is not None:
raise Exception(f"Server unexpectedly exits ({return_code=}).")
time.sleep(10)
kill_process_tree(process.pid)
@@ -888,7 +891,6 @@ def run_mulit_request_test(
enable_overlap=False,
chunked_prefill_size=32,
):
def workload_func(base_url, model):
def run_one(_):
prompt = """