From 9cd4ac76a18945a5816c62533536603bb324a65e Mon Sep 17 00:00:00 2001 From: Li Wang Date: Sun, 13 Jul 2025 20:00:35 +0800 Subject: [PATCH] [CI] Remove benchmark patch and increase the scheduler frequency (#1762) ### What this PR does / why we need it? This pr purpose to do the following things: 1. Remove `benchmark_datasets.py` patch 2. Increase the scheduler frequency to 2 times per day, due to the recent large number of daily submissions, we need to increase the default test time(6h) ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? - vLLM version: v0.9.2 - vLLM main: https://github.com/vllm-project/vllm/commit/247102f07f1dda117fef06493292e1925a5fcd31 --------- Signed-off-by: wangli --- .github/workflows/nightly_benchmarks.yaml | 7 +- benchmarks/requirements-bench.txt | 1 - benchmarks/scripts/patch_benchmark_dataset.py | 79 ------------------- .../scripts/run-performance-benchmarks.sh | 1 - 4 files changed, 4 insertions(+), 84 deletions(-) delete mode 100644 benchmarks/scripts/patch_benchmark_dataset.py diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml index 6644e6f..5a57276 100644 --- a/.github/workflows/nightly_benchmarks.yaml +++ b/.github/workflows/nightly_benchmarks.yaml @@ -20,9 +20,10 @@ name: 'Benchmarks / Performance' on: schedule: - # Run at 02:00 everyday - - cron: '00 18 * * *' - + # Run benchmarks at 20:00 and 03:00 Beijing time (UTC+8) + - cron: "0 12 * * *" + - cron: "0 19 * * *" + workflow_dispatch: # Allow manual triggering of the workflow diff --git a/benchmarks/requirements-bench.txt b/benchmarks/requirements-bench.txt index 54c28c8..2290823 100644 --- a/benchmarks/requirements-bench.txt +++ b/benchmarks/requirements-bench.txt @@ -1,5 +1,4 @@ pandas datasets modelscope -libcst tabulate \ No newline at end of file diff --git a/benchmarks/scripts/patch_benchmark_dataset.py b/benchmarks/scripts/patch_benchmark_dataset.py deleted file mode 100644 index 61e588b..0000000 --- a/benchmarks/scripts/patch_benchmark_dataset.py +++ /dev/null @@ -1,79 +0,0 @@ -import os -from argparse import ArgumentParser - -import libcst as cst -import libcst.matchers as m - -# Patch the benchmark_dataset.py file to set streaming=False in load_dataset calls - - -# TODO(Potabk): Remove this patch when the issue is fixed in the upstream -class StreamingFalseTransformer(cst.CSTTransformer): - def __init__(self): - self.in_target_class = False - self.in_target_func = False - - def visit_ClassDef(self, node): - if node.name.value == "HuggingFaceDataset": - self.in_target_class = True - - def leave_ClassDef(self, original_node, updated_node): - self.in_target_class = False - return updated_node - - def visit_FunctionDef(self, node): - if self.in_target_class and node.name.value == "load_data": - self.in_target_func = True - - def leave_FunctionDef(self, original_node, updated_node): - self.in_target_func = False - return updated_node - - def leave_Call(self, original_node, updated_node): - if self.in_target_class and self.in_target_func: - if m.matches(updated_node.func, m.Name("load_dataset")): - new_args = [] - for arg in updated_node.args: - if arg.keyword and arg.keyword.value == "streaming": - new_arg = arg.with_changes(value=cst.Name("False")) - new_args.append(new_arg) - else: - new_args.append(arg) - return updated_node.with_changes(args=new_args) - return updated_node - - -def patch_file(path): - abs_path = os.path.abspath(path) - - if not os.path.exists(abs_path): - print(f"File not found: {abs_path}") - return - - with open(abs_path, "r", encoding="utf-8") as f: - source = f.read() - - module = cst.parse_module(source) - modified = module.visit(StreamingFalseTransformer()) - - with open(abs_path, "w", encoding="utf-8") as f: - f.write(modified.code) - - print(f"Patched: {abs_path}") - - -if __name__ == "__main__": - parser = ArgumentParser( - description="Patch benchmark_dataset.py to set streaming=False in load_dataset calls" - ) - parser.add_argument( - "--path", type=str, help="Path to the benchmark_dataset.py file" - ) - parser.add_argument( - "--path", - type=str, - default="/vllm-workspace/vllm/vllm/benchmarks/datasets.py", - help="Path to the benchmark_dataset.py file", - ) - args = parser.parse_args() - patch_file(args.path) diff --git a/benchmarks/scripts/run-performance-benchmarks.sh b/benchmarks/scripts/run-performance-benchmarks.sh index d4bafc9..b604fe9 100644 --- a/benchmarks/scripts/run-performance-benchmarks.sh +++ b/benchmarks/scripts/run-performance-benchmarks.sh @@ -281,7 +281,6 @@ cleanup_on_error() { main() { START_TIME=$(date +%s) check_npus - python3 benchmarks/scripts/patch_benchmark_dataset.py # dependencies (which wget && which curl) || (apt-get update && apt-get install -y wget curl)