Add tests to AMD CI for MI35x (#9662)
Co-authored-by: Sai Enduri <saimanas.enduri@amd.com>
This commit is contained in:
65
.github/workflows/pr-test-amd.yml
vendored
65
.github/workflows/pr-test-amd.yml
vendored
@@ -28,6 +28,7 @@ jobs:
|
|||||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||||
github.event.pull_request.draft == false
|
github.event.pull_request.draft == false
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
||||||
runs-on: ${{matrix.runner}}
|
runs-on: ${{matrix.runner}}
|
||||||
@@ -54,8 +55,9 @@ jobs:
|
|||||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||||
github.event.pull_request.draft == false
|
github.event.pull_request.draft == false
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
|
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2, linux-mi35x-gpu-2]
|
||||||
runs-on: ${{matrix.runner}}
|
runs-on: ${{matrix.runner}}
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
@@ -70,7 +72,7 @@ jobs:
|
|||||||
run: bash scripts/ci/amd_ci_install_dependency.sh
|
run: bash scripts/ci/amd_ci_install_dependency.sh
|
||||||
|
|
||||||
- name: Evaluate accuracy (TP=2)
|
- name: Evaluate accuracy (TP=2)
|
||||||
timeout-minutes: 30
|
timeout-minutes: 60
|
||||||
run: |
|
run: |
|
||||||
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py
|
bash scripts/ci/amd_ci_exec.sh -e SGLANG_USE_AITER=0 python3 test_moe_eval_accuracy_large.py
|
||||||
|
|
||||||
@@ -78,6 +80,7 @@ jobs:
|
|||||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||||
github.event.pull_request.draft == false
|
github.event.pull_request.draft == false
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
||||||
runs-on: ${{matrix.runner}}
|
runs-on: ${{matrix.runner}}
|
||||||
@@ -102,6 +105,7 @@ jobs:
|
|||||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||||
github.event.pull_request.draft == false
|
github.event.pull_request.draft == false
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
||||||
runs-on: ${{matrix.runner}}
|
runs-on: ${{matrix.runner}}
|
||||||
@@ -142,6 +146,7 @@ jobs:
|
|||||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||||
github.event.pull_request.draft == false
|
github.event.pull_request.draft == false
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
runner: [linux-mi300-gpu-1, linux-mi325-gpu-1]
|
||||||
runs-on: ${{matrix.runner}}
|
runs-on: ${{matrix.runner}}
|
||||||
@@ -176,6 +181,7 @@ jobs:
|
|||||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||||
github.event.pull_request.draft == false
|
github.event.pull_request.draft == false
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
|
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
|
||||||
runs-on: ${{matrix.runner}}
|
runs-on: ${{matrix.runner}}
|
||||||
@@ -242,10 +248,36 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 8
|
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd --auto-partition-id ${{ matrix.part }} --auto-partition-size 8
|
||||||
|
|
||||||
|
unit-test-backend-1-gpu-amd-mi35x:
|
||||||
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||||
|
github.event.pull_request.draft == false
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
runner: [linux-mi35x-gpu-1]
|
||||||
|
runs-on: ${{matrix.runner}}
|
||||||
|
steps:
|
||||||
|
- name: Checkout code
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- name: Start CI container
|
||||||
|
run: bash scripts/ci/amd_ci_start_container.sh
|
||||||
|
env:
|
||||||
|
GITHUB_WORKSPACE: ${{ github.workspace }}
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: bash scripts/ci/amd_ci_install_dependency.sh
|
||||||
|
|
||||||
|
- name: Run test
|
||||||
|
timeout-minutes: 50
|
||||||
|
run: |
|
||||||
|
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-amd-mi35x
|
||||||
|
|
||||||
unit-test-backend-2-gpu-amd:
|
unit-test-backend-2-gpu-amd:
|
||||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||||
github.event.pull_request.draft == false
|
github.event.pull_request.draft == false
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
|
runner: [linux-mi300-gpu-2, linux-mi325-gpu-2]
|
||||||
runs-on: ${{matrix.runner}}
|
runs-on: ${{matrix.runner}}
|
||||||
@@ -270,6 +302,7 @@ jobs:
|
|||||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||||
github.event.pull_request.draft == false
|
github.event.pull_request.draft == false
|
||||||
strategy:
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
runner: [linux-mi300-gpu-8]
|
runner: [linux-mi300-gpu-8]
|
||||||
runs-on: ${{matrix.runner}}
|
runs-on: ${{matrix.runner}}
|
||||||
@@ -290,30 +323,6 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600
|
bash scripts/ci/amd_ci_exec.sh python3 run_suite.py --suite per-commit-8-gpu-amd --timeout-per-file 3600
|
||||||
|
|
||||||
unit-test-backend-8-gpu-CAR-amd:
|
|
||||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
|
||||||
github.event.pull_request.draft == false
|
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
runner: [linux-mi300-gpu-8]
|
|
||||||
runs-on: ${{matrix.runner}}
|
|
||||||
steps:
|
|
||||||
- name: Checkout code
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
|
|
||||||
- name: Start CI container
|
|
||||||
run: bash scripts/ci/amd_ci_start_container.sh
|
|
||||||
env:
|
|
||||||
GITHUB_WORKSPACE: ${{ github.workspace }}
|
|
||||||
|
|
||||||
- name: Install dependencies
|
|
||||||
run: bash scripts/ci/amd_ci_install_dependency.sh
|
|
||||||
|
|
||||||
- name: Run CustomAllReduce test
|
|
||||||
timeout-minutes: 20
|
|
||||||
run: |
|
|
||||||
bash scripts/ci/amd_ci_exec.sh -e CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 python3 -m unittest test_custom_allreduce.TestCustomAllReduce
|
|
||||||
|
|
||||||
unit-test-sgl-kernel-amd:
|
unit-test-sgl-kernel-amd:
|
||||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||||
github.event.pull_request.draft == false
|
github.event.pull_request.draft == false
|
||||||
@@ -350,8 +359,8 @@ jobs:
|
|||||||
needs: [
|
needs: [
|
||||||
accuracy-test-1-gpu-amd, mla-test-1-gpu-amd, bench-test-2-gpu-amd,
|
accuracy-test-1-gpu-amd, mla-test-1-gpu-amd, bench-test-2-gpu-amd,
|
||||||
accuracy-test-2-gpu-amd, performance-test-1-gpu-part-1-amd, performance-test-1-gpu-part-2-amd,
|
accuracy-test-2-gpu-amd, performance-test-1-gpu-part-1-amd, performance-test-1-gpu-part-2-amd,
|
||||||
unit-test-backend-1-gpu-amd, unit-test-backend-2-gpu-amd, unit-test-backend-8-gpu-amd,
|
unit-test-backend-1-gpu-amd, unit-test-backend-1-gpu-amd-mi35x, unit-test-backend-2-gpu-amd,
|
||||||
unit-test-sgl-kernel-amd
|
unit-test-backend-8-gpu-amd, unit-test-sgl-kernel-amd
|
||||||
]
|
]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
|||||||
@@ -2027,7 +2027,10 @@ class DeepseekV2DecoderLayer(nn.Module):
|
|||||||
quant_format = (
|
quant_format = (
|
||||||
"mxfp4"
|
"mxfp4"
|
||||||
if _is_gfx95_supported
|
if _is_gfx95_supported
|
||||||
and self.self_attn.fused_qkv_a_proj_with_mqa.weight == torch.uint8
|
and getattr(self.self_attn, "fused_qkv_a_proj_with_mqa", None) is not None
|
||||||
|
and getattr(self.self_attn.fused_qkv_a_proj_with_mqa, "weight", None)
|
||||||
|
is not None
|
||||||
|
and self.self_attn.fused_qkv_a_proj_with_mqa.weight.dtype == torch.uint8
|
||||||
else ""
|
else ""
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -2582,7 +2585,11 @@ class DeepseekV2ForCausalLM(nn.Module):
|
|||||||
0, (-1, self_attn.qk_nope_head_dim + self_attn.v_head_dim)
|
0, (-1, self_attn.qk_nope_head_dim + self_attn.v_head_dim)
|
||||||
).split([self_attn.qk_nope_head_dim, self_attn.v_head_dim], dim=1)
|
).split([self_attn.qk_nope_head_dim, self_attn.v_head_dim], dim=1)
|
||||||
|
|
||||||
if _use_aiter_gfx95 and self.quant_config.get_name() == "quark":
|
if (
|
||||||
|
_use_aiter_gfx95
|
||||||
|
and self.quant_config is not None
|
||||||
|
and self.quant_config.get_name() == "quark"
|
||||||
|
):
|
||||||
w_kc, self_attn.w_scale_k, w_vc, self_attn.w_scale_v = (
|
w_kc, self_attn.w_scale_k, w_vc, self_attn.w_scale_v = (
|
||||||
quark_post_load_weights(self_attn, w, "mxfp4")
|
quark_post_load_weights(self_attn, w, "mxfp4")
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,6 +1,18 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
|
# Detect GPU family from hostname (e.g., linux-mi35x-gpu-1-xxxxx-runner-zzzzz)
|
||||||
|
HOSTNAME_VALUE=$(hostname)
|
||||||
|
GPU_FAMILY=""
|
||||||
|
|
||||||
|
# Host names look like: linux-mi35x-gpu-1-xxxxx-runner-zzzzz
|
||||||
|
if [[ "${HOSTNAME_VALUE}" =~ ^linux-(mi[0-9]+[a-z]*)-gpu-[0-9]+ ]]; then
|
||||||
|
GPU_FAMILY="${BASH_REMATCH[1]}"
|
||||||
|
echo "Detected GPU family from hostname: ${GPU_FAMILY}"
|
||||||
|
else
|
||||||
|
echo "Warning: could not parse GPU family from '${HOSTNAME_VALUE}'"
|
||||||
|
fi
|
||||||
|
|
||||||
WORKDIR="/sglang-checkout/test/srt"
|
WORKDIR="/sglang-checkout/test/srt"
|
||||||
declare -A ENV_MAP=(
|
declare -A ENV_MAP=(
|
||||||
[SGLANG_AMD_CI]=1
|
[SGLANG_AMD_CI]=1
|
||||||
@@ -8,6 +20,11 @@ declare -A ENV_MAP=(
|
|||||||
[SGLANG_USE_AITER]=1
|
[SGLANG_USE_AITER]=1
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Conditionally add GPU_ARCHS only for mi35x
|
||||||
|
if [[ "${GPU_FAMILY}" == "mi35x" ]]; then
|
||||||
|
ENV_MAP[GPU_ARCHS]="gfx950"
|
||||||
|
fi
|
||||||
|
|
||||||
# Parse -w/--workdir and -e ENV=VAL
|
# Parse -w/--workdir and -e ENV=VAL
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
|
|||||||
@@ -1,19 +1,44 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
HOSTNAME_VALUE=$(hostname)
|
||||||
|
GPU_ARCH="mi30x" # default
|
||||||
|
|
||||||
|
# Host names look like: linux-mi35x-gpu-1-xxxxx-runner-zzzzz
|
||||||
|
if [[ "${HOSTNAME_VALUE}" =~ ^linux-(mi[0-9]+[a-z]*)-gpu-[0-9]+ ]]; then
|
||||||
|
GPU_ARCH="${BASH_REMATCH[1]}"
|
||||||
|
echo "Detected GPU architecture from hostname: ${GPU_ARCH}"
|
||||||
|
else
|
||||||
|
echo "Warning: could not parse GPU architecture from '${HOSTNAME_VALUE}', defaulting to ${GPU_ARCH}"
|
||||||
|
fi
|
||||||
|
|
||||||
# Install the required dependencies in CI.
|
# Install the required dependencies in CI.
|
||||||
docker exec ci_sglang pip install --upgrade pip
|
docker exec ci_sglang pip install --upgrade pip
|
||||||
docker exec ci_sglang pip uninstall sgl-kernel -y || true
|
docker exec ci_sglang pip uninstall sgl-kernel -y || true
|
||||||
docker exec -w /sglang-checkout/sgl-kernel ci_sglang bash -c "rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install"
|
docker exec -w /sglang-checkout/sgl-kernel ci_sglang bash -c "rm -f pyproject.toml && mv pyproject_rocm.toml pyproject.toml && python3 setup_rocm.py install"
|
||||||
docker exec ci_sglang pip install -e "python[dev_hip]"
|
|
||||||
|
case "${GPU_ARCH}" in
|
||||||
|
mi35x)
|
||||||
|
echo "Runner uses ${GPU_ARCH}; will fetch mi35x image."
|
||||||
|
docker exec ci_sglang pip install -e "python[dev_hip]" --no-deps # TODO: only for mi35x
|
||||||
|
# For lmms_evals evaluating MMMU
|
||||||
|
docker exec -w / ci_sglang git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
|
||||||
|
docker exec -w /lmms-eval ci_sglang pip install -e . --no-deps # TODO: only for mi35x
|
||||||
|
;;
|
||||||
|
mi30x|mi300|mi325)
|
||||||
|
echo "Runner uses ${GPU_ARCH}; will fetch mi30x image."
|
||||||
|
docker exec ci_sglang pip install -e "python[dev_hip]"
|
||||||
|
# For lmms_evals evaluating MMMU
|
||||||
|
docker exec -w / ci_sglang git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
|
||||||
|
docker exec -w /lmms-eval ci_sglang pip install -e .
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Runner architecture '${GPU_ARCH}' unrecognised;" >&2
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
docker exec -w / ci_sglang git clone https://github.com/merrymercy/human-eval.git
|
docker exec -w / ci_sglang git clone https://github.com/merrymercy/human-eval.git
|
||||||
docker exec -w /human-eval ci_sglang pip install -e .
|
docker exec -w /human-eval ci_sglang pip install -e .
|
||||||
|
|
||||||
# For lmms_evals evaluating MMMU
|
|
||||||
docker exec -w / ci_sglang git clone --branch v0.3.3 --depth 1 https://github.com/EvolvingLMMs-Lab/lmms-eval.git
|
|
||||||
docker exec -w /lmms-eval ci_sglang pip install -e .
|
|
||||||
|
|
||||||
docker exec -w / ci_sglang mkdir -p /dummy-grok
|
docker exec -w / ci_sglang mkdir -p /dummy-grok
|
||||||
mkdir -p dummy-grok && wget https://sharkpublic.blob.core.windows.net/sharkpublic/sglang/dummy_grok.json -O dummy-grok/config.json
|
mkdir -p dummy-grok && wget https://sharkpublic.blob.core.windows.net/sharkpublic/sglang/dummy_grok.json -O dummy-grok/config.json
|
||||||
docker cp ./dummy-grok ci_sglang:/
|
docker cp ./dummy-grok ci_sglang:/
|
||||||
|
|||||||
@@ -25,130 +25,102 @@ else
|
|||||||
echo "Warning: version.py not found, using default version: $SGLANG_VERSION" >&2
|
echo "Warning: version.py not found, using default version: $SGLANG_VERSION" >&2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
# Default base tags (can be overridden by command line arguments)
|
# Default base tags (can be overridden by command line arguments)
|
||||||
DEFAULT_MI30X_BASE_TAG="${SGLANG_VERSION}-rocm630-mi30x"
|
DEFAULT_MI30X_BASE_TAG="${SGLANG_VERSION}-rocm630-mi30x"
|
||||||
DEFAULT_MI35X_BASE_TAG="${SGLANG_VERSION}-rocm700-mi35x"
|
DEFAULT_MI35X_BASE_TAG="${SGLANG_VERSION}-rocm700-mi35x"
|
||||||
|
|
||||||
# Parse command line arguments
|
# Parse command line arguments
|
||||||
MI30X_BASE_TAG="$DEFAULT_MI30X_BASE_TAG"
|
MI30X_BASE_TAG="${DEFAULT_MI30X_BASE_TAG}"
|
||||||
MI35X_BASE_TAG="$DEFAULT_MI35X_BASE_TAG"
|
MI35X_BASE_TAG="${DEFAULT_MI35X_BASE_TAG}"
|
||||||
|
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case $1 in
|
case $1 in
|
||||||
--mi30x-base-tag)
|
--mi30x-base-tag) MI30X_BASE_TAG="$2"; shift 2;;
|
||||||
MI30X_BASE_TAG="$2"
|
--mi35x-base-tag) MI35X_BASE_TAG="$2"; shift 2;;
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
--mi35x-base-tag)
|
|
||||||
MI35X_BASE_TAG="$2"
|
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
-h|--help)
|
-h|--help)
|
||||||
echo "Usage: $0 [--mi30x-base-tag TAG] [--mi35x-base-tag TAG]"
|
echo "Usage: $0 [--mi30x-base-tag TAG] [--mi35x-base-tag TAG]"
|
||||||
echo " --mi30x-base-tag TAG Base tag for mi30x images (default: $DEFAULT_MI30X_BASE_TAG)"
|
|
||||||
echo " --mi35x-base-tag TAG Base tag for mi35x images (default: $DEFAULT_MI35X_BASE_TAG)"
|
|
||||||
exit 0
|
exit 0
|
||||||
;;
|
;;
|
||||||
*)
|
*) echo "Unknown option $1"; exit 1;;
|
||||||
echo "Unknown option $1"
|
|
||||||
echo "Use --help for usage information"
|
|
||||||
exit 1
|
|
||||||
;;
|
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Detect GPU architecture from the Kubernetes runner hostname
|
||||||
|
HOSTNAME_VALUE=$(hostname)
|
||||||
|
GPU_ARCH="mi30x" # default
|
||||||
|
|
||||||
|
# Host names look like: linux-mi35x-gpu-1-xxxxx-runner-zzzzz
|
||||||
|
if [[ "${HOSTNAME_VALUE}" =~ ^linux-(mi[0-9]+[a-z]*)-gpu-[0-9]+ ]]; then
|
||||||
|
GPU_ARCH="${BASH_REMATCH[1]}"
|
||||||
|
echo "Detected GPU architecture from hostname: ${GPU_ARCH}"
|
||||||
|
else
|
||||||
|
echo "Warning: could not parse GPU architecture from '${HOSTNAME_VALUE}', defaulting to ${GPU_ARCH}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Normalise / collapse architectures we don’t yet build specifically for
|
||||||
|
case "${GPU_ARCH}" in
|
||||||
|
mi35x)
|
||||||
|
echo "Runner uses ${GPU_ARCH}; will fetch mi35x image."
|
||||||
|
;;
|
||||||
|
mi30x|mi300|mi325)
|
||||||
|
echo "Runner uses ${GPU_ARCH}; will fetch mi30x image."
|
||||||
|
GPU_ARCH="mi30x"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Runner architecture '${GPU_ARCH}' unrecognised; defaulting to mi30x image." >&2
|
||||||
|
GPU_ARCH="mi30x"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
|
||||||
# Set up DEVICE_FLAG based on Kubernetes pod info
|
# Set up DEVICE_FLAG based on Kubernetes pod info
|
||||||
if [ -f "/etc/podinfo/gha-render-devices" ]; then
|
if [[ -f /etc/podinfo/gha-render-devices ]]; then
|
||||||
DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices)
|
DEVICE_FLAG=$(cat /etc/podinfo/gha-render-devices)
|
||||||
else
|
else
|
||||||
DEVICE_FLAG="--device /dev/dri"
|
DEVICE_FLAG="--device /dev/dri"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
# Find the latest image
|
||||||
# Function to find latest available image for a given GPU architecture
|
|
||||||
find_latest_image() {
|
find_latest_image() {
|
||||||
local gpu_arch=$1
|
local gpu_arch=$1
|
||||||
local base_tag
|
local base_tag days_back image_tag
|
||||||
|
|
||||||
if [ "$gpu_arch" == "mi30x" ]; then
|
case "${gpu_arch}" in
|
||||||
base_tag="$MI30X_BASE_TAG"
|
mi30x) base_tag="${MI30X_BASE_TAG}" ;;
|
||||||
elif [ "$gpu_arch" == "mi35x" ]; then
|
mi35x) base_tag="${MI35X_BASE_TAG}" ;;
|
||||||
base_tag="$MI35X_BASE_TAG"
|
*) echo "Error: unsupported GPU architecture '${gpu_arch}'" >&2; return 1 ;;
|
||||||
else
|
esac
|
||||||
echo "Error: Unsupported GPU architecture '$gpu_arch'" >&2
|
|
||||||
return 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
local days_back=0
|
|
||||||
|
|
||||||
while [ $days_back -lt 7 ]; do
|
|
||||||
local check_date=$(date -d "$days_back days ago" +%Y%m%d)
|
|
||||||
local image_tag="${base_tag}-${check_date}"
|
|
||||||
|
|
||||||
|
for days_back in {0..6}; do
|
||||||
|
image_tag="${base_tag}-$(date -d "${days_back} days ago" +%Y%m%d)"
|
||||||
echo "Checking for image: rocm/sgl-dev:${image_tag}" >&2
|
echo "Checking for image: rocm/sgl-dev:${image_tag}" >&2
|
||||||
|
|
||||||
# Check if the image exists by trying to get its manifest
|
|
||||||
if docker manifest inspect "rocm/sgl-dev:${image_tag}" >/dev/null 2>&1; then
|
if docker manifest inspect "rocm/sgl-dev:${image_tag}" >/dev/null 2>&1; then
|
||||||
echo "Found available image: rocm/sgl-dev:${image_tag}" >&2
|
echo "Found available image: rocm/sgl-dev:${image_tag}" >&2
|
||||||
echo "rocm/sgl-dev:${image_tag}"
|
echo "rocm/sgl-dev:${image_tag}"
|
||||||
return 0
|
return 0
|
||||||
fi
|
fi
|
||||||
|
|
||||||
days_back=$((days_back + 1))
|
|
||||||
done
|
done
|
||||||
|
|
||||||
echo "Error: No ${gpu_arch} image found in the last 7 days for version ${base_tag}" >&2
|
echo "Error: no ${gpu_arch} image found in the last 7 days for base ${base_tag}" >&2
|
||||||
|
echo "Using hard-coded fallback…" >&2
|
||||||
# Final fallback to specific hardcoded images
|
if [[ "${gpu_arch}" == "mi35x" ]]; then
|
||||||
echo "Using final fallback images..." >&2
|
|
||||||
if [ "$gpu_arch" == "mi30x" ]; then
|
|
||||||
echo "rocm/sgl-dev:v0.5.0rc0-rocm630-mi30x-20250812"
|
|
||||||
elif [ "$gpu_arch" == "mi35x" ]; then
|
|
||||||
echo "rocm/sgl-dev:v0.5.0rc0-rocm700-mi35x-20250812"
|
echo "rocm/sgl-dev:v0.5.0rc0-rocm700-mi35x-20250812"
|
||||||
else
|
else
|
||||||
echo "rocm/sgl-dev:v0.5.0rc0-rocm630-mi30x-20250812" # Default to mi30x
|
echo "rocm/sgl-dev:v0.5.0rc0-rocm630-mi30x-20250812"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
return 0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Determine image finder and fallback based on runner
|
# Pull and run the latest image
|
||||||
# In Kubernetes, the hostname contains the GPU type (e.g., linux-mi300-gpu-1-bgg8r-runner-vknlb)
|
|
||||||
# Extract the GPU type from hostname
|
|
||||||
HOSTNAME_VALUE=$(hostname)
|
|
||||||
RUNNER_NAME="unknown"
|
|
||||||
|
|
||||||
if [[ "${HOSTNAME_VALUE}" =~ ^(linux-mi[0-9]+-gpu-[0-9]+) ]]; then
|
|
||||||
RUNNER_NAME="${BASH_REMATCH[1]}"
|
|
||||||
echo "Extracted runner from hostname: ${RUNNER_NAME}"
|
|
||||||
else
|
|
||||||
echo "Could not extract runner info from hostname: ${HOSTNAME_VALUE}"
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "The runner is: ${RUNNER_NAME}"
|
|
||||||
GPU_ARCH="mi30x"
|
|
||||||
|
|
||||||
# Check for mi350/mi355 runners
|
|
||||||
if [[ "${RUNNER_NAME}" =~ ^linux-mi350-gpu-[0-9]+$ ]] || [[ "${RUNNER_NAME}" =~ ^linux-mi355-gpu-[0-9]+$ ]]; then
|
|
||||||
echo "Runner is ${RUNNER_NAME}, will find mi35x image."
|
|
||||||
GPU_ARCH="mi35x"
|
|
||||||
# Check for mi300/mi325 runners
|
|
||||||
elif [[ "${RUNNER_NAME}" =~ ^linux-mi300-gpu-[0-9]+$ ]] || [[ "${RUNNER_NAME}" =~ ^linux-mi325-gpu-[0-9]+$ ]]; then
|
|
||||||
echo "Runner is ${RUNNER_NAME}, will find mi30x image."
|
|
||||||
else
|
|
||||||
echo "Runner type not recognized: '${RUNNER_NAME}'"
|
|
||||||
echo "Defaulting to find mi30x image"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Find and pull the latest image
|
|
||||||
IMAGE=$(find_latest_image "${GPU_ARCH}")
|
IMAGE=$(find_latest_image "${GPU_ARCH}")
|
||||||
echo "Pulling Docker image: $IMAGE"
|
echo "Pulling Docker image: ${IMAGE}"
|
||||||
docker pull "$IMAGE"
|
docker pull "${IMAGE}"
|
||||||
|
|
||||||
# Run the container
|
echo "Launching container: ci_sglang"
|
||||||
echo "Starting container: ci_sglang"
|
docker run -dt --user root --device=/dev/kfd ${DEVICE_FLAG} \
|
||||||
docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
|
|
||||||
-v "${GITHUB_WORKSPACE:-$PWD}:/sglang-checkout" \
|
-v "${GITHUB_WORKSPACE:-$PWD}:/sglang-checkout" \
|
||||||
--ipc=host --group-add video \
|
--ipc=host --group-add video \
|
||||||
--shm-size 32g \
|
--shm-size 32g \
|
||||||
@@ -157,4 +129,4 @@ docker run -dt --user root --device=/dev/kfd $DEVICE_FLAG \
|
|||||||
--security-opt seccomp=unconfined \
|
--security-opt seccomp=unconfined \
|
||||||
-w /sglang-checkout \
|
-w /sglang-checkout \
|
||||||
--name ci_sglang \
|
--name ci_sglang \
|
||||||
"$IMAGE"
|
"${IMAGE}"
|
||||||
|
|||||||
@@ -243,6 +243,10 @@ suite_amd = {
|
|||||||
TestFile("test_wave_attention_kernels.py", 2),
|
TestFile("test_wave_attention_kernels.py", 2),
|
||||||
TestFile("test_wave_attention_backend.py", 150),
|
TestFile("test_wave_attention_backend.py", 150),
|
||||||
],
|
],
|
||||||
|
"per-commit-amd-mi35x": [
|
||||||
|
TestFile("test_mla.py", 242),
|
||||||
|
TestFile("test_gpt_oss_1gpu.py", 600),
|
||||||
|
],
|
||||||
"per-commit-2-gpu-amd": [
|
"per-commit-2-gpu-amd": [
|
||||||
TestFile("lora/test_lora_tp.py", 116),
|
TestFile("lora/test_lora_tp.py", 116),
|
||||||
TestFile("rl/test_update_weights_from_distributed.py", 103),
|
TestFile("rl/test_update_weights_from_distributed.py", 103),
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
|
import os
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
from types import SimpleNamespace
|
from types import SimpleNamespace
|
||||||
from typing import Dict, List, Literal, Optional
|
from typing import Dict, List, Literal, Optional
|
||||||
|
|
||||||
from sglang.srt.utils import kill_process_tree
|
from sglang.srt.utils import is_hip, kill_process_tree
|
||||||
from sglang.test.run_eval import run_eval
|
from sglang.test.run_eval import run_eval
|
||||||
from sglang.test.test_utils import (
|
from sglang.test.test_utils import (
|
||||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||||
@@ -14,6 +15,7 @@ from sglang.test.test_utils import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
_base_url = DEFAULT_URL_FOR_TEST
|
_base_url = DEFAULT_URL_FOR_TEST
|
||||||
|
_is_hip = is_hip()
|
||||||
|
|
||||||
|
|
||||||
class BaseTestGptOss(CustomTestCase):
|
class BaseTestGptOss(CustomTestCase):
|
||||||
@@ -36,7 +38,8 @@ class BaseTestGptOss(CustomTestCase):
|
|||||||
|
|
||||||
if model_variant == "20b":
|
if model_variant == "20b":
|
||||||
other_args += ["--cuda-graph-max-bs", "600"]
|
other_args += ["--cuda-graph-max-bs", "600"]
|
||||||
|
if _is_hip:
|
||||||
|
os.environ["SGLANG_USE_AITER"] = "0"
|
||||||
self._run_test_raw(
|
self._run_test_raw(
|
||||||
model=model,
|
model=model,
|
||||||
expected_score_of_reasoning_effort=expected_score_of_reasoning_effort,
|
expected_score_of_reasoning_effort=expected_score_of_reasoning_effort,
|
||||||
|
|||||||
Reference in New Issue
Block a user