From 9cd4ac76a18945a5816c62533536603bb324a65e Mon Sep 17 00:00:00 2001
From: Li Wang <wangli858794774@gmail.com>
Date: Sun, 13 Jul 2025 20:00:35 +0800
Subject: [PATCH] [CI] Remove benchmark patch and increase the scheduler
 frequency (#1762)

### What this PR does / why we need it?
This pr purpose to do the following things:
1. Remove `benchmark_datasets.py` patch
2. Increase the scheduler frequency to 2 times per day, due to the
recent large number of daily submissions, we need to increase the
default test time(6h)
### Does this PR introduce _any_ user-facing change?

### How was this patch tested?


- vLLM version: v0.9.2
- vLLM main:
https://github.com/vllm-project/vllm/commit/247102f07f1dda117fef06493292e1925a5fcd31

---------

Signed-off-by: wangli <wangli858794774@gmail.com>
---
 .github/workflows/nightly_benchmarks.yaml     |  7 +-
 benchmarks/requirements-bench.txt             |  1 -
 benchmarks/scripts/patch_benchmark_dataset.py | 79 -------------------
 .../scripts/run-performance-benchmarks.sh     |  1 -
 4 files changed, 4 insertions(+), 84 deletions(-)
 delete mode 100644 benchmarks/scripts/patch_benchmark_dataset.py

diff --git a/.github/workflows/nightly_benchmarks.yaml b/.github/workflows/nightly_benchmarks.yaml
index 6644e6f..5a57276 100644
--- a/.github/workflows/nightly_benchmarks.yaml
+++ b/.github/workflows/nightly_benchmarks.yaml
@@ -20,9 +20,10 @@ name: 'Benchmarks / Performance'
 
 on:
   schedule:
-    # Run at 02:00 everyday
-    - cron: '00 18 * * *'
-  
+    # Run benchmarks at 20:00 and 03:00 Beijing time (UTC+8)
+    - cron: "0 12 * * *"
+    - cron: "0 19 * * *"
+
   workflow_dispatch:
     # Allow manual triggering of the workflow
 
diff --git a/benchmarks/requirements-bench.txt b/benchmarks/requirements-bench.txt
index 54c28c8..2290823 100644
--- a/benchmarks/requirements-bench.txt
+++ b/benchmarks/requirements-bench.txt
@@ -1,5 +1,4 @@
 pandas
 datasets
 modelscope
-libcst
 tabulate
\ No newline at end of file
diff --git a/benchmarks/scripts/patch_benchmark_dataset.py b/benchmarks/scripts/patch_benchmark_dataset.py
deleted file mode 100644
index 61e588b..0000000
--- a/benchmarks/scripts/patch_benchmark_dataset.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import os
-from argparse import ArgumentParser
-
-import libcst as cst
-import libcst.matchers as m
-
-# Patch the benchmark_dataset.py file to set streaming=False in load_dataset calls
-
-
-# TODO(Potabk): Remove this patch when the issue is fixed in the upstream
-class StreamingFalseTransformer(cst.CSTTransformer):
-    def __init__(self):
-        self.in_target_class = False
-        self.in_target_func = False
-
-    def visit_ClassDef(self, node):
-        if node.name.value == "HuggingFaceDataset":
-            self.in_target_class = True
-
-    def leave_ClassDef(self, original_node, updated_node):
-        self.in_target_class = False
-        return updated_node
-
-    def visit_FunctionDef(self, node):
-        if self.in_target_class and node.name.value == "load_data":
-            self.in_target_func = True
-
-    def leave_FunctionDef(self, original_node, updated_node):
-        self.in_target_func = False
-        return updated_node
-
-    def leave_Call(self, original_node, updated_node):
-        if self.in_target_class and self.in_target_func:
-            if m.matches(updated_node.func, m.Name("load_dataset")):
-                new_args = []
-                for arg in updated_node.args:
-                    if arg.keyword and arg.keyword.value == "streaming":
-                        new_arg = arg.with_changes(value=cst.Name("False"))
-                        new_args.append(new_arg)
-                    else:
-                        new_args.append(arg)
-                return updated_node.with_changes(args=new_args)
-        return updated_node
-
-
-def patch_file(path):
-    abs_path = os.path.abspath(path)
-
-    if not os.path.exists(abs_path):
-        print(f"File not found: {abs_path}")
-        return
-
-    with open(abs_path, "r", encoding="utf-8") as f:
-        source = f.read()
-
-    module = cst.parse_module(source)
-    modified = module.visit(StreamingFalseTransformer())
-
-    with open(abs_path, "w", encoding="utf-8") as f:
-        f.write(modified.code)
-
-    print(f"Patched: {abs_path}")
-
-
-if __name__ == "__main__":
-    parser = ArgumentParser(
-        description="Patch benchmark_dataset.py to set streaming=False in load_dataset calls"
-    )
-    parser.add_argument(
-        "--path", type=str, help="Path to the benchmark_dataset.py file"
-    )
-    parser.add_argument(
-        "--path",
-        type=str,
-        default="/vllm-workspace/vllm/vllm/benchmarks/datasets.py",
-        help="Path to the benchmark_dataset.py file",
-    )
-    args = parser.parse_args()
-    patch_file(args.path)
diff --git a/benchmarks/scripts/run-performance-benchmarks.sh b/benchmarks/scripts/run-performance-benchmarks.sh
index d4bafc9..b604fe9 100644
--- a/benchmarks/scripts/run-performance-benchmarks.sh
+++ b/benchmarks/scripts/run-performance-benchmarks.sh
@@ -281,7 +281,6 @@ cleanup_on_error() {
 main() {
   START_TIME=$(date +%s)
   check_npus
-  python3 benchmarks/scripts/patch_benchmark_dataset.py
   
   # dependencies
   (which wget && which curl) || (apt-get update && apt-get install -y wget curl)