diff --git a/scripts/ci_monitor/ci_analyzer.py b/scripts/ci_monitor/ci_analyzer.py index 1655c7e53..3dab9a9e8 100755 --- a/scripts/ci_monitor/ci_analyzer.py +++ b/scripts/ci_monitor/ci_analyzer.py @@ -31,9 +31,10 @@ class SGLangCIAnalyzer: self.session = requests.Session() self.session.headers.update(self.headers) - def get_recent_runs(self, limit: int = 100) -> List[Dict]: + def get_recent_runs(self, limit: int = 100, branch: str = None) -> List[Dict]: """Get recent CI run data""" - print(f"Fetching {limit} recent CI runs...") + branch_info = f" from branch '{branch}'" if branch else "" + print(f"Fetching {limit} recent CI runs{branch_info}...") all_runs = [] page = 1 @@ -42,6 +43,8 @@ class SGLangCIAnalyzer: while len(all_runs) < limit: url = f"{self.base_url}/repos/{self.repo}/actions/runs" params = {"per_page": min(per_page, limit - len(all_runs)), "page": page} + if branch: + params["branch"] = branch try: response = self.session.get(url, params=params) @@ -407,6 +410,11 @@ def main(): default="ci_analysis.json", help="Output file (default: ci_analysis.json)", ) + parser.add_argument( + "--branch", + default="main", + help="Filter runs by branch (default: 'main'). Set to empty string '' to analyze all branches.", + ) args = parser.parse_args() @@ -415,7 +423,9 @@ def main(): try: # Get CI run data - runs = analyzer.get_recent_runs(args.limit) + # Use None for branch if empty string is provided (to scan all branches) + branch = args.branch if args.branch else None + runs = analyzer.get_recent_runs(args.limit, branch) if not runs: print("No CI run data found") diff --git a/scripts/ci_monitor/ci_analyzer_perf.py b/scripts/ci_monitor/ci_analyzer_perf.py index 12ff04e55..ba0defebb 100755 --- a/scripts/ci_monitor/ci_analyzer_perf.py +++ b/scripts/ci_monitor/ci_analyzer_perf.py @@ -128,24 +128,29 @@ class SGLangPerfAnalyzer: rcParams["grid.alpha"] = 0.3 def get_recent_runs( - self, limit: int = 100, start_date: str = None, end_date: str = None + self, + limit: int = 100, + start_date: str = None, + end_date: str = None, + branch: str = None, ) -> List[Dict]: """Get recent CI run data with multiple collection strategies""" # If date range is specified, get all data in that range if start_date or end_date: - return self._get_date_range_runs(start_date, end_date) + return self._get_date_range_runs(start_date, end_date, branch) - print(f"Getting PR Test runs (limit: {limit})...") + branch_info = f" from branch '{branch}'" if branch else "" + print(f"Getting PR Test runs{branch_info} (limit: {limit})...") # Use sampling strategy if limit >= 500, otherwise use sequential if limit >= 500: print(f"Using uniform sampling for {limit} runs to cover ~30 days...") - return self._get_sampled_runs(limit) + return self._get_sampled_runs(limit, branch) else: - return self._get_sequential_runs(limit) + return self._get_sequential_runs(limit, branch) - def _get_sequential_runs(self, limit: int) -> List[Dict]: + def _get_sequential_runs(self, limit: int, branch: str = None) -> List[Dict]: """Original sequential method for smaller limits""" print(f"Using sequential sampling for {limit} runs...") @@ -156,6 +161,8 @@ class SGLangPerfAnalyzer: while len(pr_test_runs) < limit: url = f"{self.base_url}/repos/{self.repo}/actions/runs" params = {"per_page": per_page, "page": page} + if branch: + params["branch"] = branch try: response = self.session.get(url, params=params) @@ -192,12 +199,14 @@ class SGLangPerfAnalyzer: return pr_test_runs - def _get_sampled_runs(self, limit: int) -> List[Dict]: + def _get_sampled_runs(self, limit: int, branch: str = None) -> List[Dict]: """Uniform sampling method for 30-day coverage""" from datetime import datetime, timedelta # Uniform sampling across 30 days - sampled_runs = self._sample_time_period(limit, days_back=30, uniform=True) + sampled_runs = self._sample_time_period( + limit, days_back=30, uniform=True, branch=branch + ) print( f"Sampled {len(sampled_runs)} runs from 30-day period (requested: {limit})" @@ -210,6 +219,7 @@ class SGLangPerfAnalyzer: days_back: int, skip_recent_days: int = 0, uniform: bool = False, + branch: str = None, ) -> List[Dict]: """Sample runs from a specific time period""" from datetime import datetime, timedelta @@ -231,6 +241,8 @@ class SGLangPerfAnalyzer: while True: url = f"{self.base_url}/repos/{self.repo}/actions/runs" params = {"per_page": per_page, "page": page} + if branch: + params["branch"] = branch try: response = self.session.get(url, params=params) @@ -358,7 +370,7 @@ class SGLangPerfAnalyzer: return sampled_runs def _get_date_range_runs( - self, start_date: str = None, end_date: str = None + self, start_date: str = None, end_date: str = None, branch: str = None ) -> List[Dict]: """Get all CI runs within specified date range""" from datetime import datetime, timedelta @@ -394,8 +406,9 @@ class SGLangPerfAnalyzer: f"start_date ({start_date}) must be before end_date ({end_date})" ) + branch_info = f" from branch '{branch}'" if branch else "" print( - f"Getting ALL CI runs from {start_time.strftime('%Y-%m-%d')} to {end_time.strftime('%Y-%m-%d')}" + f"Getting ALL CI runs{branch_info} from {start_time.strftime('%Y-%m-%d')} to {end_time.strftime('%Y-%m-%d')}" ) collected_runs = [] @@ -406,6 +419,8 @@ class SGLangPerfAnalyzer: while True: url = f"{self.base_url}/repos/{self.repo}/actions/runs" params = {"per_page": per_page, "page": page} + if branch: + params["branch"] = branch try: response = self.session.get(url, params=params) @@ -1331,6 +1346,11 @@ def main(): type=str, help="End date for date range query (YYYY-MM-DD format). When specified with --start-date, gets ALL runs in range.", ) + parser.add_argument( + "--branch", + default="main", + help="Filter runs by branch (default: 'main'). Set to empty string '' to analyze all branches.", + ) args = parser.parse_args() @@ -1339,7 +1359,11 @@ def main(): try: # Get CI run data - runs = analyzer.get_recent_runs(args.limit, args.start_date, args.end_date) + # Use None for branch if empty string is provided (to scan all branches) + branch = args.branch if args.branch else None + runs = analyzer.get_recent_runs( + args.limit, args.start_date, args.end_date, branch + ) if not runs: print("No CI run data found")