Fix potential memory fault issue and ncclSystemError in CI test (#8681)

Co-authored-by: wunhuang <wunhuang@amd.com>
This commit is contained in:
kk
2025-08-06 03:19:37 +08:00
committed by GitHub
parent 4f4e0e4162
commit 32d9e39a29
3 changed files with 7 additions and 9 deletions

View File

@@ -291,7 +291,7 @@ jobs:
bash scripts/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600
- name: Run CustomAllReduce test
timeout-minutes: 10
timeout-minutes: 20
run: |
bash scripts/amd_ci_exec.sh -e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m unittest test_custom_allreduce.TestCustomAllReduce