add pkgs
This commit is contained in:
90
examples/vllm_test/run_stats_server.py
Normal file
90
examples/vllm_test/run_stats_server.py
Normal file
@@ -0,0 +1,90 @@
|
||||
import re
|
||||
import sys
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
# 用于记录每个度量的值
|
||||
first_token_times_values = []
|
||||
prompt_throughput_values = []
|
||||
generation_throughput_values = []
|
||||
running_values = []
|
||||
|
||||
# 从文件中读取数据
|
||||
file_path = "server.log.valid" # 替换成你的文件路径
|
||||
with open(file_path, 'r') as file:
|
||||
# 遍历文件中的每一行进行统计
|
||||
for line in file:
|
||||
# 使用正则表达式提取Avg First Token times和Avg generation throughput以及Running的值
|
||||
match_first_token = re.search(r"Avg First Token times:([0-9.]+)", line)
|
||||
match_prompt_throughput = re.search(r"Avg prompt throughput: ([0-9.]+)", line)
|
||||
match_generation_throughput = re.search(r"Avg generation throughput: ([0-9.]+)", line)
|
||||
match_running = re.search(r"Running: (\d+)", line)
|
||||
|
||||
# 统计Avg First Token times
|
||||
if match_first_token:
|
||||
first_token_times = float(match_first_token.group(1))
|
||||
if abs(first_token_times) > 1e-5:
|
||||
first_token_times_values.append(first_token_times)
|
||||
|
||||
if match_prompt_throughput:
|
||||
prompt_throughput = float(match_prompt_throughput.group(1))
|
||||
if abs(prompt_throughput) > 1e-5:
|
||||
prompt_throughput_values.append(prompt_throughput)
|
||||
|
||||
# 统计Avg generation throughput和Running
|
||||
if match_generation_throughput and match_running:
|
||||
generation_throughput = float(match_generation_throughput.group(1))
|
||||
running = int(match_running.group(1))
|
||||
if abs(generation_throughput) > 1e-5:
|
||||
generation_throughput_values.append(generation_throughput)
|
||||
running_values.append(running)
|
||||
|
||||
# 计算平均值
|
||||
avg_first_token_times = np.mean(first_token_times_values) if len(first_token_times_values) > 0 else 0
|
||||
max_first_token_times = np.max(first_token_times_values) if len(first_token_times_values) > 0 else 0
|
||||
min_first_token_times = np.min(first_token_times_values) if len(first_token_times_values) > 0 else 0
|
||||
p10_first_token_times = np.percentile(first_token_times_values, 10) if len(first_token_times_values) > 0 else 0
|
||||
p90_first_token_times = np.percentile(first_token_times_values, 90) if len(first_token_times_values) > 0 else 0
|
||||
p99_first_token_times = np.percentile(first_token_times_values, 99) if len(first_token_times_values) > 0 else 0
|
||||
cnt_first_token_times = len(first_token_times_values) if len(first_token_times_values) > 0 else 0
|
||||
|
||||
avg_prompt_throughput = np.mean(prompt_throughput_values) if len(prompt_throughput_values) > 0 else 0
|
||||
max_prompt_throughput = np.max(prompt_throughput_values) if len(prompt_throughput_values) > 0 else 0
|
||||
min_prompt_throughput = np.min(prompt_throughput_values) if len(prompt_throughput_values) > 0 else 0
|
||||
p10_prompt_throughput = np.percentile(prompt_throughput_values, 10) if len(prompt_throughput_values) > 0 else 0
|
||||
p90_prompt_throughput = np.percentile(prompt_throughput_values, 90) if len(prompt_throughput_values) > 0 else 0
|
||||
p99_prompt_throughput = np.percentile(prompt_throughput_values, 99) if len(prompt_throughput_values) > 0 else 0
|
||||
cnt_prompt_throughput = len(prompt_throughput_values) if len(prompt_throughput_values) > 0 else 0
|
||||
|
||||
avg_generation_throughput = np.mean(generation_throughput_values) if len(generation_throughput_values) > 0 else 0
|
||||
max_generation_throughput = np.max(generation_throughput_values) if len(generation_throughput_values) > 0 else 0
|
||||
min_generation_throughput = np.min(generation_throughput_values) if len(generation_throughput_values) > 0 else 0
|
||||
p10_generation_throughput = np.percentile(generation_throughput_values, 10) if len(generation_throughput_values) > 0 else 0
|
||||
p90_generation_throughput = np.percentile(generation_throughput_values, 90) if len(generation_throughput_values) > 0 else 0
|
||||
p99_generation_throughput = np.percentile(generation_throughput_values, 99) if len(generation_throughput_values) > 0 else 0
|
||||
cnt_generation_throughput = len(generation_throughput_values) if len(generation_throughput_values) > 0 else 0
|
||||
|
||||
avg_running = np.mean(running_values) if len(running_values) > 0 else 0
|
||||
max_running = np.max(running_values) if len(running_values) > 0 else 0
|
||||
min_running = np.min(running_values) if len(running_values) > 0 else 0
|
||||
p10_running = np.percentile(running_values, 10) if len(running_values) > 0 else 0
|
||||
p90_running = np.percentile(running_values, 90) if len(running_values) > 0 else 0
|
||||
p99_running = np.percentile(running_values, 99) if len(running_values) > 0 else 0
|
||||
cnt_running = len(running_values) if len(running_values) > 0 else 0
|
||||
|
||||
# Create a DataFrame
|
||||
data = {
|
||||
'avg': [avg_first_token_times, avg_prompt_throughput, avg_generation_throughput, avg_running],
|
||||
'max': [max_first_token_times, max_prompt_throughput, max_generation_throughput, max_running],
|
||||
'min': [min_first_token_times, min_prompt_throughput, min_generation_throughput, min_running],
|
||||
'p10': [p10_first_token_times, p10_prompt_throughput, p10_generation_throughput, p10_running],
|
||||
'p90': [p90_first_token_times, p90_prompt_throughput, p90_generation_throughput, p90_running],
|
||||
'p99': [p99_first_token_times, p99_prompt_throughput, p99_generation_throughput, p99_running],
|
||||
'num': [cnt_first_token_times, cnt_prompt_throughput, cnt_generation_throughput, cnt_running]
|
||||
}
|
||||
|
||||
df = pd.DataFrame(data, index=['first_token_times', 'prompt_throughput', 'generation_throughput', 'running'])
|
||||
|
||||
# Display the DataFrame
|
||||
print(df)
|
||||
|
||||
Reference in New Issue
Block a user