[Fix AMD CI] VRAM cleanup (#11174)

Co-authored-by: root <root@smci350-zts-gtu-e17-15.zts-gtu.dcgpu>
This commit is contained in:
sunxxuns
2025-10-05 19:03:53 -04:00
committed by GitHub
parent c560410da7
commit 5e142484e2
4 changed files with 162 additions and 1 deletions

27
scripts/check_vram_clear.sh Executable file
View File

@@ -0,0 +1,27 @@
#!/bin/bash
check_vram_clear() {
local vram_threshold_percent=5 # Allow up to 5% VRAM usage
local memory_threshold_mb=500 # Allow up to 500MB memory usage
if command -v rocm-smi >/dev/null 2>&1; then
echo "Checking ROCm GPU VRAM usage..."
# Check if any GPU has more than threshold VRAM allocated
local high_usage=$(rocm-smi --showmemuse | grep -E "GPU Memory Allocated \(VRAM%\): ([6-9]|[1-9][0-9]|100)")
if [ -n "$high_usage" ]; then
echo "ERROR: VRAM usage exceeds threshold (${vram_threshold_percent}%) on some GPUs:"
echo "$high_usage"
rocm-smi --showmemuse
return 1
else
echo "✓ VRAM usage is within acceptable limits on all GPUs"
return 0
fi
fi
}
# If this script is run directly (not sourced), run the check
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
set -e
check_vram_clear
fi