[AMD] Support Hierarchical Caching on AMD GPUs (#8236)

This commit is contained in:
Hubert Lu
2025-08-28 15:27:07 -07:00
committed by GitHub
parent 5343058875
commit 711390a971
10 changed files with 105 additions and 32 deletions

View File

@@ -223,7 +223,7 @@ jobs:
fail-fast: false
matrix:
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
part: [0, 1, 2, 3, 4, 5, 6]
part: [0, 1, 2, 3, 4, 5, 6, 7]
runs-on: ${{matrix.runner}}
steps:
- name: Checkout code
@@ -240,7 +240,7 @@ jobs:
- name: Run test
timeout-minutes: 50
run: |
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 7
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 8
unit-test-backend-2-gpu-amd:
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
@@ -336,13 +336,14 @@ jobs:
bash scripts/ci/amd_ci_install_dependency.sh
- name: Run test
timeout-minutes: 10
timeout-minutes: 14
run: |
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_align.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_moe_topk_softmax.py
docker exec -w /sglang-checkout/sgl-kernel/tests/speculative ci_sglang python3 -m pytest test_eagle_utils.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_apply_token_bitmask_inplace.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_activation.py
docker exec -w /sglang-checkout/sgl-kernel/tests ci_sglang python3 -m pytest test_kvcacheio.py
pr-test-amd-finish:
if: always()