Misc fix for min_p_sampling, --cuda-graph-bs (#2761)
This commit is contained in:
@@ -532,6 +532,8 @@ def run_bench_serving(
|
||||
request_rate,
|
||||
other_server_args,
|
||||
dataset_name="random",
|
||||
dataset_path="",
|
||||
tokenizer=None,
|
||||
random_input_len=4096,
|
||||
random_output_len=2048,
|
||||
disable_stream=False,
|
||||
@@ -553,9 +555,9 @@ def run_bench_serving(
|
||||
host=None,
|
||||
port=None,
|
||||
dataset_name=dataset_name,
|
||||
dataset_path="",
|
||||
dataset_path=dataset_path,
|
||||
model=None,
|
||||
tokenizer=None,
|
||||
tokenizer=tokenizer,
|
||||
num_prompts=num_prompts,
|
||||
sharegpt_output_len=None,
|
||||
random_input_len=random_input_len,
|
||||
@@ -657,16 +659,16 @@ STDERR_FILENAME = "stderr.txt"
|
||||
STDOUT_FILENAME = "stdout.txt"
|
||||
|
||||
|
||||
def read_output(output_lines):
|
||||
def read_output(output_lines: List[str], filename: str = STDERR_FILENAME):
|
||||
"""Print the output in real time with another thread."""
|
||||
while not os.path.exists(STDERR_FILENAME):
|
||||
while not os.path.exists(filename):
|
||||
time.sleep(1)
|
||||
|
||||
pt = 0
|
||||
while pt >= 0:
|
||||
if pt > 0 and not os.path.exists(STDERR_FILENAME):
|
||||
if pt > 0 and not os.path.exists(filename):
|
||||
break
|
||||
lines = open(STDERR_FILENAME).readlines()
|
||||
lines = open(filename).readlines()
|
||||
for line in lines[pt:]:
|
||||
print(line, end="", flush=True)
|
||||
output_lines.append(line)
|
||||
@@ -747,6 +749,33 @@ def run_and_check_memory_leak(
|
||||
assert has_abort
|
||||
|
||||
|
||||
def run_command_and_capture_output(command, env: Optional[dict] = None):
|
||||
stdout = open(STDOUT_FILENAME, "w")
|
||||
stderr = open(STDERR_FILENAME, "w")
|
||||
process = subprocess.Popen(
|
||||
command, stdout=stdout, stderr=stderr, env=env, text=True
|
||||
)
|
||||
|
||||
# Launch a thread to stream the output
|
||||
output_lines = []
|
||||
t = threading.Thread(target=read_output, args=(output_lines, STDOUT_FILENAME))
|
||||
t.start()
|
||||
|
||||
# Join the process
|
||||
process.wait()
|
||||
|
||||
stdout.close()
|
||||
stderr.close()
|
||||
if os.path.exists(STDOUT_FILENAME):
|
||||
os.remove(STDOUT_FILENAME)
|
||||
if os.path.exists(STDERR_FILENAME):
|
||||
os.remove(STDERR_FILENAME)
|
||||
kill_process_tree(process.pid)
|
||||
t.join()
|
||||
|
||||
return output_lines
|
||||
|
||||
|
||||
def run_mmlu_test(
|
||||
disable_radix_cache=False,
|
||||
enable_mixed_chunk=False,
|
||||
|
||||
Reference in New Issue
Block a user