# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Generate plots for benchmark results.""" from pathlib import Path from typing import Any from vllm.utils.import_utils import PlaceholderModule try: import plotly.express as px import plotly.io as pio except ImportError: _plotly = PlaceholderModule("plotly") px = _plotly.placeholder_attr("express") pio = _plotly.placeholder_attr("io") try: import matplotlib.pyplot as plt except ImportError: _matplotlib = PlaceholderModule("matplotlib") plt = _matplotlib.placeholder_attr("pyplot") def generate_timeline_plot( results: list[dict[str, Any]], output_path: Path, colors: list[str] | None = None, itl_thresholds: list[float] | None = None, labels: list[str] | None = None, ) -> None: """ Generate an HTML timeline plot from benchmark results. Args: results: List of per-request result dictionaries containing: - start_time: Request start time (seconds) - ttft: Time to first token (seconds) - itl: List of inter-token latencies (seconds) - latency: Total request latency (seconds) - prompt_len: Number of prompt tokens - output_tokens: Number of output tokens output_path: Path where the HTML file will be saved colors: List of colors for ITL categories (default: green, orange, red, black) itl_thresholds: ITL thresholds in seconds (default: [1.0, 4.0, 6.0]) labels: Labels for ITL categories (default based on thresholds) """ # Set defaults if colors is None: colors = ["#109618", "#FF7F0E", "#D62728"] if itl_thresholds is None: itl_thresholds = [0.025, 0.050] if labels is None: labels = [ f"ITL < {itl_thresholds[0] * 1000:.0f}ms", f"{itl_thresholds[0] * 1000:.0f}ms ≤ ITL < {itl_thresholds[1] * 1000:.0f}ms", # noqa f"ITL ≥ {itl_thresholds[1] * 1000:.0f}ms", ] labels_colors = {"TTFT": "#636EFA", **dict(zip(labels, colors))} labels_order = ["TTFT"] + labels timeline_data = construct_timeline_data(results, itl_thresholds, labels) if not timeline_data: print("No timeline data to plot") return # Create the plot fig = px.timeline( timeline_data, x_start="start", x_end="end", y="request_id", color="type", color_discrete_map=labels_colors, category_orders={"type": labels_order}, hover_data=[ "prompt_tokens", "output_tokens", "req_start_time", "req_finish_time", "segment_start", "segment_end", "duration", ], ) # Customize hover template to show only time without date fig.update_traces( hovertemplate="%{y}
" "Type: %{fullData.name}
" "Start: %{customdata[4]}
" "End: %{customdata[5]}
" "Duration: %{customdata[6]}
" "Prompt Tokens: %{customdata[0]}
" "Output Tokens: %{customdata[1]}
" "Request Start Time: %{customdata[2]}
" "Request End Time: %{customdata[3]}
" "" ) fig.update_yaxes(autorange="reversed") fig.update_layout( xaxis_title="Time", yaxis_title="Request ID", showlegend=True, ) # Save to HTML pio.write_html(fig, str(output_path)) print(f"Timeline plot saved to: {output_path}") def construct_timeline_data( requests_data: list[dict[str, Any]], itl_thresholds: list[float], labels: list[str], ) -> list[dict[str, Any]]: """ Construct timeline data from request results. Args: requests_data: List of per-request result dictionaries itl_thresholds: ITL thresholds in seconds labels: Labels for ITL categories Returns: List of timeline segments for plotting """ def tostr(sec_time: float) -> str: """Convert seconds to HH:MM:SS.mmm format.""" h = int(sec_time // 3600) assert h < 100, "time seems to last more than 100 hours" m = int((sec_time % 3600) // 60) s = sec_time % 60 return f"{h:02d}:{m:02d}:{s:06.3f}" def itl_type(itl: float) -> str: """Categorize ITL based on thresholds.""" if itl < itl_thresholds[0]: return labels[0] elif itl < itl_thresholds[1]: return labels[1] else: return labels[2] # Find the earliest start time to use as t0 t0 = None for request in requests_data: start_time = request.get("start_time") if start_time is not None and (t0 is None or start_time < t0): t0 = start_time if t0 is None: return [] timeline_data = [] for i, request in enumerate(requests_data): start_time = request.get("start_time") ttft = request.get("ttft") itl = request.get("itl", []) latency = request.get("latency") prompt_len = request.get("prompt_len", 0) output_tokens = request.get("output_tokens", 0) # Skip requests without required data if start_time is None or ttft is None or latency is None: continue # Normalize start time start_time = start_time - t0 start_time_str = tostr(start_time) # TTFT segment ttft_end = start_time + ttft ttft_end_str = tostr(ttft_end) timeline_data.append( { "request_id": f"Req {i}", "start": start_time_str, "end": ttft_end_str, "type": "TTFT", "prompt_tokens": prompt_len, "output_tokens": output_tokens, "req_start_time": tostr(start_time), "req_finish_time": tostr(start_time + latency), "segment_start": start_time_str, "segment_end": ttft_end_str, "duration": f"{ttft:.3f}s", } ) # ITL segments prev_time = ttft_end prev_time_str = ttft_end_str for itl_value in itl: itl_end = prev_time + itl_value itl_end_str = tostr(itl_end) timeline_data.append( { "request_id": f"Req {i}", "start": prev_time_str, "end": itl_end_str, "type": itl_type(itl_value), "prompt_tokens": prompt_len, "output_tokens": output_tokens, "req_start_time": tostr(start_time), "req_finish_time": tostr(start_time + latency), "segment_start": prev_time_str, "segment_end": itl_end_str, "duration": f"{itl_value:.3f}s", } ) prev_time = itl_end prev_time_str = itl_end_str return timeline_data def generate_dataset_stats_plot( results: list[dict[str, Any]], output_path: Path, ) -> None: """ Generate a matplotlib figure with dataset statistics. Creates a figure with 4 subplots: - Top-left: Prompt tokens distribution (histogram) - Top-right: Output tokens distribution (histogram) - Bottom-left: Prompt+output tokens distribution (histogram) - Bottom-right: Stacked bar chart (request_id vs tokens) Args: results: List of per-request result dictionaries containing: - prompt_len: Number of prompt tokens - output_tokens: Number of output tokens output_path: Path where the figure will be saved """ # Extract data prompt_tokens = [] output_tokens = [] total_tokens = [] for request in results: prompt_len = request.get("prompt_len", 0) output_len = request.get("output_tokens", 0) prompt_tokens.append(prompt_len) output_tokens.append(output_len) total_tokens.append(prompt_len + output_len) if not prompt_tokens: print("No data available for dataset statistics plot") return # Create figure with 4 subplots fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(14, 10)) # Top-left: Prompt tokens distribution ax1.hist(prompt_tokens, bins=30, color="steelblue", edgecolor="black", alpha=0.7) ax1.set_xlabel("Prompt Tokens") ax1.set_ylabel("Frequency") ax1.set_title("Prompt Tokens Distribution") ax1.grid(True, alpha=0.3) # Top-right: Output tokens distribution ax2.hist(output_tokens, bins=30, color="coral", edgecolor="black", alpha=0.7) ax2.set_xlabel("Output Tokens") ax2.set_ylabel("Frequency") ax2.set_title("Output Tokens Distribution") ax2.grid(True, alpha=0.3) # Bottom-left: Prompt+output tokens distribution ax3.hist( total_tokens, bins=30, color="mediumseagreen", edgecolor="black", alpha=0.7 ) ax3.set_xlabel("Total Tokens (Prompt + Output)") ax3.set_ylabel("Frequency") ax3.set_title("Total Tokens Distribution") ax3.grid(True, alpha=0.3) # Bottom-right: Stacked bar chart request_ids = list(range(len(prompt_tokens))) ax4.bar( request_ids, prompt_tokens, label="Prompt Tokens", color="steelblue", alpha=0.7 ) ax4.bar( request_ids, output_tokens, bottom=prompt_tokens, label="Output Tokens", color="coral", alpha=0.7, ) ax4.set_xlabel("Request ID") ax4.set_ylabel("Tokens") ax4.set_title("Tokens per Request (Stacked)") ax4.legend() ax4.grid(True, alpha=0.3, axis="y") # Adjust layout to prevent overlap plt.tight_layout() # Save figure plt.savefig(str(output_path), dpi=150, bbox_inches="tight") plt.close(fig) print(f"Dataset statistics plot saved to: {output_path}")