From fff7fbabe63f44e6aa4fe7d3a4a5a3215370ad4f Mon Sep 17 00:00:00 2001 From: Mick Date: Sat, 27 Sep 2025 02:02:44 +0800 Subject: [PATCH] ci: fix rate-limit of huggingface with hf auth login (#10947) --- .github/workflows/pr-test.yml | 26 ++++++++++++++++++++++++++ scripts/ci/ci_install_dependency.sh | 6 ++++++ test/srt/run_suite.py | 6 +++--- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index 487b0719d..5ae0f89be 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -103,6 +103,8 @@ jobs: needs: [check-changes, sgl-kernel-build-wheels] if: needs.check-changes.outputs.sgl_kernel == 'true' runs-on: 1-gpu-runner + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} steps: - uses: actions/checkout@v4 @@ -191,6 +193,8 @@ jobs: if: always() && !failure() && !cancelled() && ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) runs-on: 1-gpu-runner + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} strategy: fail-fast: false matrix: @@ -222,6 +226,8 @@ jobs: if: always() && !failure() && !cancelled() && ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) runs-on: 2-gpu-runner + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} strategy: fail-fast: false matrix: @@ -253,6 +259,8 @@ jobs: if: always() && !failure() && !cancelled() && ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) runs-on: 4-gpu-runner + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} strategy: fail-fast: false matrix: @@ -284,6 +292,8 @@ jobs: if: always() && !failure() && !cancelled() && ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) runs-on: 8-gpu-runner + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} strategy: fail-fast: false matrix: @@ -315,6 +325,8 @@ jobs: if: always() && !failure() && !cancelled() && ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) runs-on: 1-gpu-runner + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -374,6 +386,8 @@ jobs: if: always() && !failure() && !cancelled() && ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) runs-on: 1-gpu-runner + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -425,6 +439,8 @@ jobs: if: always() && !failure() && !cancelled() && ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) runs-on: 2-gpu-runner + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -482,6 +498,8 @@ jobs: if: always() && !failure() && !cancelled() && ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) runs-on: 1-gpu-runner + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -512,6 +530,8 @@ jobs: if: always() && !failure() && !cancelled() && ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) runs-on: 2-gpu-runner + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -542,6 +562,8 @@ jobs: if: always() && !failure() && !cancelled() && ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) runs-on: 4-gpu-runner + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -569,6 +591,8 @@ jobs: if: always() && !failure() && !cancelled() && ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) runs-on: 8-gpu-runner + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} steps: - name: Checkout code uses: actions/checkout@v4 @@ -596,6 +620,8 @@ jobs: if: always() && !failure() && !cancelled() && ((needs.check-changes.outputs.main_package == 'true') || (needs.check-changes.outputs.sgl_kernel == 'true')) runs-on: 4-b200-runner + env: + HF_TOKEN: ${{ secrets.HF_TOKEN }} strategy: fail-fast: false steps: diff --git a/scripts/ci/ci_install_dependency.sh b/scripts/ci/ci_install_dependency.sh index e4db2f736..e7f9f0c42 100755 --- a/scripts/ci/ci_install_dependency.sh +++ b/scripts/ci/ci_install_dependency.sh @@ -91,3 +91,9 @@ fi # Show current packages $PIP_CMD list + + +if [ -n "${HF_TOKEN:-}" ]; then + $PIP_CMD install -U "huggingface_hub[cli]" $PIP_INSTALL_SUFFIX + hf auth login --token $HF_TOKEN +fi diff --git a/test/srt/run_suite.py b/test/srt/run_suite.py index 5dd588649..53720467a 100644 --- a/test/srt/run_suite.py +++ b/test/srt/run_suite.py @@ -35,7 +35,7 @@ suites = { # TestFile("models/test_grok_models.py", 60), # Disabled due to illegal memory access TestFile("models/test_qwen_models.py", 82), TestFile("models/test_reward_models.py", 132), - TestFile("models/test_vlm_models.py", 437), + TestFile("models/test_vlm_models.py", 741), TestFile("models/test_transformers_models.py", 320), TestFile("openai_server/basic/test_protocol.py", 10), TestFile("openai_server/basic/test_serving_chat.py", 10), @@ -93,7 +93,7 @@ suites = { TestFile("test_multi_tokenizer.py", 230), TestFile("test_no_chunked_prefill.py", 108), TestFile("test_no_overlap_scheduler.py", 234), - TestFile("test_original_logprobs.py", 200), + TestFile("test_original_logprobs.py", 41), TestFile("test_penalty.py", 41), TestFile("test_page_size.py", 60), TestFile("test_priority_scheduling.py", 100), @@ -244,7 +244,7 @@ suite_amd = { TestFile("test_skip_tokenizer_init.py", 117), TestFile("test_srt_engine.py", 261), TestFile("test_srt_endpoint.py", 130), - TestFile("test_torch_compile.py", 76), + TestFile("test_torch_compile.py", 169), TestFile("test_torch_compile_moe.py", 172), TestFile("test_torch_native_attention_backend.py", 123), TestFile("test_triton_attention_backend.py", 150),