feat: update experiment_runner (#5360)
This commit is contained in:
28
test/srt/configs/llama_405b.yaml
Normal file
28
test/srt/configs/llama_405b.yaml
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
tasks:
|
||||||
|
- name: sglang-8192-1024-concurrency1
|
||||||
|
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
|
||||||
|
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 1 --num-prompts 5 --output-file llama_405b_results.jsonl
|
||||||
|
|
||||||
|
- name: sglang-8192-1024-concurrency2
|
||||||
|
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
|
||||||
|
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 2 --num-prompts 10 --output-file llama_405b_results.jsonl
|
||||||
|
|
||||||
|
- name: sglang-8192-1024-concurrency4
|
||||||
|
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
|
||||||
|
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 4 --num-prompts 20 --output-file llama_405b_results.jsonl
|
||||||
|
|
||||||
|
- name: sglang-8192-1024-concurrency8
|
||||||
|
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
|
||||||
|
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 8 --num-prompts 32 --output-file llama_405b_results.jsonl
|
||||||
|
|
||||||
|
- name: sglang-8192-1024-concurrency16
|
||||||
|
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
|
||||||
|
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 16 --num-prompts 48 --output-file llama_405b_results.jsonl
|
||||||
|
|
||||||
|
- name: sglang-8192-1024-concurrency24
|
||||||
|
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
|
||||||
|
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 24 --num-prompts 72 --output-file llama_405b_results.jsonl
|
||||||
|
|
||||||
|
- name: sglang-8192-1024-concurrency32
|
||||||
|
server_cmd: python3 -m sglang.launch_server --model nvidia/Llama-3.1-405B-Instruct-FP8 --tp 8
|
||||||
|
client_cmd: python3 -m sglang.bench_serving --random-range-ratio 1 --random-input-len 8192 --random-output-len 1024 --max-concurrency 32 --num-prompts 96 --output-file llama_405b_results.jsonl
|
||||||
@@ -317,6 +317,11 @@ def format_results(results: List[TaskResult]) -> str:
|
|||||||
return "\n".join(output)
|
return "\n".join(output)
|
||||||
|
|
||||||
|
|
||||||
|
def get_bool_env_var(name: str, default: str = "false") -> bool:
|
||||||
|
value = os.getenv(name, default)
|
||||||
|
return value.lower() in ("true", "1")
|
||||||
|
|
||||||
|
|
||||||
def write_in_github_step_summary(results: List[TaskResult]):
|
def write_in_github_step_summary(results: List[TaskResult]):
|
||||||
"""Write formatted results to GitHub step summary."""
|
"""Write formatted results to GitHub step summary."""
|
||||||
if not os.environ.get("GITHUB_STEP_SUMMARY"):
|
if not os.environ.get("GITHUB_STEP_SUMMARY"):
|
||||||
@@ -349,7 +354,8 @@ def main():
|
|||||||
result = runner.run_task(config)
|
result = runner.run_task(config)
|
||||||
results.append(result)
|
results.append(result)
|
||||||
|
|
||||||
write_in_github_step_summary(results)
|
if get_bool_env_var("SGLANG_IS_IN_CI"):
|
||||||
|
write_in_github_step_summary(results)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error: {e}")
|
logger.error(f"Error: {e}")
|
||||||
raise
|
raise
|
||||||
|
|||||||
46
test/srt/parse_results.py
Normal file
46
test/srt/parse_results.py
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
import json
|
||||||
|
import pandas as pd
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
from tabulate import tabulate
|
||||||
|
|
||||||
|
# Parse command-line arguments
|
||||||
|
parser = argparse.ArgumentParser(description="Parse JSONL benchmark and summarize.")
|
||||||
|
parser.add_argument("input_file", type=str, help="Path to input JSONL file")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
input_file = args.input_file
|
||||||
|
base_name = os.path.splitext(os.path.basename(input_file))[0]
|
||||||
|
output_file = f"{base_name}_summary.csv"
|
||||||
|
|
||||||
|
fields = [
|
||||||
|
"max_concurrency",
|
||||||
|
"output_throughput",
|
||||||
|
"mean_ttft_ms",
|
||||||
|
"median_ttft_ms",
|
||||||
|
"p99_ttft_ms",
|
||||||
|
"mean_tpot_ms",
|
||||||
|
"median_tpot_ms",
|
||||||
|
"p99_tpot_ms",
|
||||||
|
]
|
||||||
|
|
||||||
|
# Read JSONL and parse
|
||||||
|
results = []
|
||||||
|
with open(input_file, "r") as f:
|
||||||
|
for line in f:
|
||||||
|
data = json.loads(line)
|
||||||
|
row = {field: data.get(field, None) for field in fields}
|
||||||
|
max_conc = data.get("max_concurrency")
|
||||||
|
out_tp = data.get("output_throughput")
|
||||||
|
row["per_user_throughput"] = out_tp / max_conc if max_conc else None
|
||||||
|
results.append(row)
|
||||||
|
|
||||||
|
# Convert to DataFrame
|
||||||
|
df = pd.DataFrame(results)
|
||||||
|
|
||||||
|
# Save to CSV
|
||||||
|
df.to_csv(output_file, index=False)
|
||||||
|
print(f"\nSaved summary to: {output_file}\n")
|
||||||
|
|
||||||
|
# Print ASCII table
|
||||||
|
print(tabulate(df, headers="keys", tablefmt="grid", floatfmt=".3f"))
|
||||||
Reference in New Issue
Block a user