[ci] recover 8-gpu deepep test (#8105)
This commit is contained in:
42
.github/workflows/pr-test.yml
vendored
42
.github/workflows/pr-test.yml
vendored
@@ -324,33 +324,33 @@ jobs:
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-4-gpu-deepep
|
||||
|
||||
# unit-test-deepep-8-gpu:
|
||||
# if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
# github.event.pull_request.draft == false
|
||||
# runs-on: 8-gpu-runner
|
||||
# needs: [
|
||||
# unit-test-deepep-4-gpu,
|
||||
# ]
|
||||
# steps:
|
||||
# - name: Checkout code
|
||||
# uses: actions/checkout@v4
|
||||
#
|
||||
# - name: Install dependencies
|
||||
# run: |
|
||||
# bash scripts/ci_install_deepep.sh
|
||||
#
|
||||
# - name: Run test
|
||||
# timeout-minutes: 20
|
||||
# run: |
|
||||
# cd test/srt
|
||||
# python3 run_suite.py --suite per-commit-8-gpu-deepep
|
||||
unit-test-deepep-8-gpu:
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false
|
||||
runs-on: 8-gpu-runner
|
||||
needs: [
|
||||
unit-test-deepep-4-gpu,
|
||||
]
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci_install_deepep.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 20
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-8-gpu-deepep
|
||||
|
||||
finish:
|
||||
if: always()
|
||||
needs: [
|
||||
unit-test-frontend, unit-test-backend-1-gpu, unit-test-backend-2-gpu, unit-test-backend-4-gpu,
|
||||
unit-test-backend-8-gpu, performance-test-1-gpu-part-1, performance-test-1-gpu-part-2, performance-test-2-gpu,
|
||||
accuracy-test-1-gpu, accuracy-test-2-gpu, unit-test-deepep-4-gpu, # unit-test-deepep-8-gpu,
|
||||
accuracy-test-1-gpu, accuracy-test-2-gpu, unit-test-deepep-4-gpu, unit-test-deepep-8-gpu,
|
||||
]
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
@@ -4,30 +4,30 @@ set -euxo pipefail
|
||||
|
||||
bash scripts/ci_install_dependency.sh
|
||||
|
||||
if python3 -c "import deep_ep" >/dev/null 2>&1; then
|
||||
echo "deep_ep is already installed or importable. Skipping installation."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
export GDRCOPY_HOME=/usr/src/gdrdrv-2.4.4/
|
||||
export NVSHMEM_DIR=/opt/nvshmem/install
|
||||
export LD_LIBRARY_PATH="${NVSHMEM_DIR}/lib:$LD_LIBRARY_PATH"
|
||||
export PATH="${NVSHMEM_DIR}/bin:$PATH"
|
||||
export CUDA_HOME=/usr/local/cuda
|
||||
|
||||
if python3 -c "import deep_ep" >/dev/null 2>&1; then
|
||||
echo "deep_ep is already installed or importable. Skipping installation."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Install system dependencies
|
||||
apt install -y curl wget git sudo libibverbs-dev rdma-core infiniband-diags openssh-server perftest ibverbs-providers libibumad3 libibverbs1 libnl-3-200 libnl-route-3-200 librdmacm1 build-essential cmake
|
||||
|
||||
# Install GDRCopy
|
||||
rm -rf /opt/gdrcopy && mkdir -p /opt/gdrcopy
|
||||
mkdir -p /opt/nvshmem
|
||||
rm -rf /opt/nvshmem && mkdir -p /opt/nvshmem
|
||||
cd /opt/gdrcopy
|
||||
git clone https://github.com/NVIDIA/gdrcopy.git .
|
||||
git checkout v2.4.4
|
||||
apt update
|
||||
apt install -y nvidia-dkms-535
|
||||
apt install -y build-essential devscripts debhelper fakeroot pkg-config dkms
|
||||
apt install -y check libsubunit0 libsubunit-dev
|
||||
apt install -y check libsubunit0 libsubunit-dev python3-venv
|
||||
cd packages
|
||||
CUDA=/usr/local/cuda ./build-deb-packages.sh
|
||||
dpkg -i gdrdrv-dkms_*.deb
|
||||
@@ -40,16 +40,11 @@ if [ ! -e "/usr/lib/x86_64-linux-gnu/libmlx5.so" ]; then
|
||||
fi
|
||||
apt-get update && apt-get install -y libfabric-dev
|
||||
|
||||
# Clone DeepEP
|
||||
rm -rf /root/.cache/deepep && git clone https://github.com/deepseek-ai/DeepEP.git /root/.cache/deepep && cd /root/.cache/deepep && git checkout eef7ab50fa5cf0ab1dd3fce4c6493c90bdf290ac
|
||||
|
||||
# Install NVSHMEM
|
||||
cd /opt/nvshmem
|
||||
wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.2.5/source/nvshmem_src_3.2.5-1.txz
|
||||
tar -xf nvshmem_src_3.2.5-1.txz
|
||||
rm -rf nvshmem && mv nvshmem_src nvshmem
|
||||
cd nvshmem
|
||||
git apply /root/.cache/deepep/third-party/nvshmem.patch
|
||||
wget https://developer.download.nvidia.com/compute/redist/nvshmem/3.3.9/source/nvshmem_src_cuda12-all-all-3.3.9.tar.gz
|
||||
tar -xf nvshmem_src_cuda12-all-all-3.3.9.tar.gz
|
||||
mv nvshmem_src nvshmem && cd nvshmem
|
||||
NVSHMEM_SHMEM_SUPPORT=0 \
|
||||
NVSHMEM_UCX_SUPPORT=0 \
|
||||
NVSHMEM_USE_NCCL=0 \
|
||||
@@ -63,12 +58,10 @@ cd build
|
||||
make -j$(nproc) install
|
||||
|
||||
# Install DeepEP
|
||||
rm -rf /root/.cache/deepep && git clone https://github.com/deepseek-ai/DeepEP.git /root/.cache/deepep && cd /root/.cache/deepep && git checkout b6ce310bb0b75079682d09bc2ebc063a074fbd58
|
||||
cd /root/.cache/deepep && python3 setup.py install
|
||||
|
||||
# Verify configuration
|
||||
echo "=== NCCL Configuration ==="
|
||||
nvidia-smi topo -m
|
||||
nvidia-smi nvlink -s
|
||||
echo "=== Verify GDRCOPY ==="
|
||||
gdrcopy_copybw
|
||||
echo "=== Verify NVSHMEM ==="
|
||||
|
||||
@@ -45,6 +45,7 @@ class TestDeepseek(CustomTestCase):
|
||||
"256",
|
||||
"--max-running-requests",
|
||||
"2048",
|
||||
"--disable-radix-cache",
|
||||
],
|
||||
)
|
||||
|
||||
@@ -54,10 +55,10 @@ class TestDeepseek(CustomTestCase):
|
||||
|
||||
def test_gsm8k(self):
|
||||
args = SimpleNamespace(
|
||||
num_shots=8,
|
||||
num_shots=5,
|
||||
data_path=None,
|
||||
num_questions=1250,
|
||||
parallel=1250,
|
||||
num_questions=1200,
|
||||
parallel=1200,
|
||||
max_new_tokens=512,
|
||||
host="http://127.0.0.1",
|
||||
port=int(self.base_url.split(":")[-1]),
|
||||
@@ -65,7 +66,7 @@ class TestDeepseek(CustomTestCase):
|
||||
metrics = run_eval_few_shot_gsm8k(args)
|
||||
print(f"Eval accuracy of GSM8K: {metrics=}")
|
||||
|
||||
self.assertGreater(metrics["accuracy"], 0.93)
|
||||
self.assertGreater(metrics["accuracy"], 0.92)
|
||||
|
||||
|
||||
class TestDeepseekMTP(CustomTestCase):
|
||||
@@ -107,6 +108,7 @@ class TestDeepseekMTP(CustomTestCase):
|
||||
"1",
|
||||
"--speculative-num-draft-tokens",
|
||||
"2",
|
||||
"--disable-radix-cache",
|
||||
],
|
||||
)
|
||||
|
||||
@@ -116,10 +118,10 @@ class TestDeepseekMTP(CustomTestCase):
|
||||
|
||||
def test_gsm8k(self):
|
||||
args = SimpleNamespace(
|
||||
num_shots=8,
|
||||
num_shots=5,
|
||||
data_path=None,
|
||||
num_questions=1250,
|
||||
parallel=1250,
|
||||
num_questions=1200,
|
||||
parallel=1200,
|
||||
max_new_tokens=512,
|
||||
host="http://127.0.0.1",
|
||||
port=int(self.base_url.split(":")[-1]),
|
||||
@@ -127,7 +129,7 @@ class TestDeepseekMTP(CustomTestCase):
|
||||
metrics = run_eval_few_shot_gsm8k(args)
|
||||
print(f"Eval accuracy of GSM8K: {metrics=}")
|
||||
|
||||
self.assertGreater(metrics["accuracy"], 0.93)
|
||||
self.assertGreater(metrics["accuracy"], 0.92)
|
||||
|
||||
server_info = requests.get(self.base_url + "/get_server_info")
|
||||
avg_spec_accept_length = server_info.json()["internal_states"][0][
|
||||
@@ -138,7 +140,7 @@ class TestDeepseekMTP(CustomTestCase):
|
||||
f"accuracy={metrics['accuracy']=:.3f}\n"
|
||||
f"{avg_spec_accept_length=:.3f}\n"
|
||||
)
|
||||
self.assertGreater(avg_spec_accept_length, 1.9)
|
||||
self.assertGreater(avg_spec_accept_length, 1.85)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -36,6 +36,8 @@ class TestPureDP(CustomTestCase):
|
||||
"128",
|
||||
"--max-running-requests",
|
||||
"128",
|
||||
"--mem-fraction-static",
|
||||
"0.5",
|
||||
],
|
||||
)
|
||||
|
||||
@@ -56,7 +58,7 @@ class TestPureDP(CustomTestCase):
|
||||
metrics = run_eval_few_shot_gsm8k(args)
|
||||
print(metrics)
|
||||
|
||||
self.assertGreater(metrics["accuracy"], 0.62)
|
||||
self.assertGreater(metrics["accuracy"], 0.60)
|
||||
|
||||
|
||||
class TestHybridDPTP(CustomTestCase):
|
||||
@@ -100,7 +102,7 @@ class TestHybridDPTP(CustomTestCase):
|
||||
metrics = run_eval_few_shot_gsm8k(args)
|
||||
print(metrics)
|
||||
|
||||
self.assertGreater(metrics["accuracy"], 0.62)
|
||||
self.assertGreater(metrics["accuracy"], 0.60)
|
||||
|
||||
|
||||
class TestTP(CustomTestCase):
|
||||
@@ -141,10 +143,10 @@ class TestTP(CustomTestCase):
|
||||
metrics = run_eval_few_shot_gsm8k(args)
|
||||
print(metrics)
|
||||
|
||||
self.assertGreater(metrics["accuracy"], 0.62)
|
||||
self.assertGreater(metrics["accuracy"], 0.60)
|
||||
|
||||
|
||||
# @unittest.skip("covered in test_deepep_large.py")
|
||||
@unittest.skip("covered in test_deepep_large.py")
|
||||
class TestNoGatherdBuffer(CustomTestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
@@ -189,7 +191,7 @@ class TestNoGatherdBuffer(CustomTestCase):
|
||||
metrics = run_eval_few_shot_gsm8k(args)
|
||||
print(metrics)
|
||||
|
||||
self.assertGreater(metrics["accuracy"], 0.62)
|
||||
self.assertGreater(metrics["accuracy"], 0.60)
|
||||
|
||||
|
||||
class TestTBO(CustomTestCase):
|
||||
@@ -236,10 +238,10 @@ class TestTBO(CustomTestCase):
|
||||
metrics = run_eval_few_shot_gsm8k(args)
|
||||
print(metrics)
|
||||
|
||||
self.assertGreater(metrics["accuracy"], 0.62)
|
||||
self.assertGreater(metrics["accuracy"], 0.60)
|
||||
|
||||
|
||||
# @unittest.skip("covered in TestMTPWithTBO")
|
||||
@unittest.skip("covered in TestMTPWithTBO")
|
||||
class TestMTP(CustomTestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
@@ -280,8 +282,6 @@ class TestMTP(CustomTestCase):
|
||||
kill_process_tree(cls.process.pid)
|
||||
|
||||
def test_gsm8k(self):
|
||||
requests.get(self.base_url + "/flush_cache")
|
||||
|
||||
args = SimpleNamespace(
|
||||
num_shots=5,
|
||||
data_path=None,
|
||||
@@ -352,8 +352,6 @@ class TestMTPWithTBO(CustomTestCase):
|
||||
kill_process_tree(cls.process.pid)
|
||||
|
||||
def test_gsm8k(self):
|
||||
requests.get(self.base_url + "/flush_cache")
|
||||
|
||||
args = SimpleNamespace(
|
||||
num_shots=5,
|
||||
data_path=None,
|
||||
|
||||
Reference in New Issue
Block a user