[AMD] Support Hierarchical Caching on AMD GPUs (#8236)

2025-08-28 15:27:07 -07:00
parent 5343058875
commit 711390a971
10 changed files with 105 additions and 32 deletions
--- a/.github/workflows/pr-test-amd.yml
+++ b/.github/workflows/pr-test-amd.yml
@@ -223,7 +223,7 @@ jobs:
      fail-fast: false
      matrix:
        runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
-        part: [0, 1, 2, 3, 4, 5, 6]
+        part: [0, 1, 2, 3, 4, 5, 6, 7]
    runs-on: ${{matrix.runner}}
    steps:
      - name: Checkout code
@@ -240,7 +240,7 @@ jobs:
      - name: Run test
        timeout-minutes: 50
        run: |
-          bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 7
+          bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 8

  unit-test-backend-2-gpu-amd:
    if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
@@ -336,13 +336,14 @@ jobs:
          bash scripts/ci/amd_ci_install_dependency.sh

      - name: Run test
-        timeout-minutes: 10
+        timeout-minutes: 14
        run: |
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py
          docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py
          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py
+          docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py

  pr-test-amd-finish:
    if: always()