[CI] add deepseek w4a8 test on h20 ci (#7758)
This commit is contained in:
80
.github/workflows/pr-test-h20.yml
vendored
Normal file
80
.github/workflows/pr-test-h20.yml
vendored
Normal file
@@ -0,0 +1,80 @@
|
||||
name: PR Test (H20)
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
required: true
|
||||
type: choice
|
||||
default: 'release'
|
||||
options:
|
||||
- 'release'
|
||||
- 'nightly'
|
||||
|
||||
concurrency:
|
||||
group: pr-test-h20-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check-changes:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
src: ${{ steps.filter.outputs.src }}
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Detect file changes
|
||||
id: filter
|
||||
uses: dorny/paths-filter@v3
|
||||
with:
|
||||
filters: |
|
||||
src:
|
||||
- "python/sglang/srt/models/deepseek*"
|
||||
- "python/sglang/srt/layers/moe/**"
|
||||
- ".github/workflows/pr-test-h20.yml"
|
||||
|
||||
per-commit-8-gpu-h20:
|
||||
needs: [check-changes]
|
||||
if: (github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request') &&
|
||||
github.event.pull_request.draft == false &&
|
||||
needs.check-changes.outputs.src == 'true'
|
||||
runs-on: 8-gpu-h20
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
bash scripts/ci/ci_install_dependency.sh
|
||||
|
||||
- name: Run test
|
||||
timeout-minutes: 20
|
||||
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 run_suite.py --suite per-commit-8-gpu-h20
|
||||
|
||||
pr-test-finish:
|
||||
needs: [
|
||||
check-changes,
|
||||
per-commit-8-gpu-h20,
|
||||
]
|
||||
if: needs.check-changes.outputs.src == 'true'
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Check all dependent job statuses
|
||||
run: |
|
||||
results=(${{ join(needs.*.result, ' ') }})
|
||||
for result in "${results[@]}"; do
|
||||
if [ "$result" = "failure" ] || [ "$result" = "cancelled" ]; then
|
||||
echo "Job failed with result: $result"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
echo "All jobs completed successfully"
|
||||
exit 0
|
||||
@@ -78,6 +78,7 @@ DEFAULT_AWQ_MOE_MODEL_NAME_FOR_TEST = (
|
||||
"hugging-quants/Mixtral-8x7B-Instruct-v0.1-AWQ-INT4"
|
||||
)
|
||||
DEFAULT_ENABLE_THINKING_MODEL_NAME_FOR_TEST = "Qwen/Qwen3-30B-A3B"
|
||||
DEFAULT_DEEPSEEK_W4AFP8_MODEL_FOR_TEST = "Barrrrry/DeepSeek-R1-W4AFP8"
|
||||
|
||||
# Nightly tests
|
||||
DEFAULT_MODEL_NAME_FOR_NIGHTLY_EVAL_TP1 = "meta-llama/Llama-3.1-8B-Instruct,mistralai/Mistral-7B-Instruct-v0.3,deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct,google/gemma-2-27b-it"
|
||||
|
||||
122
test/srt/quant/test_w4a8_deepseek_v3.py
Normal file
122
test/srt/quant/test_w4a8_deepseek_v3.py
Normal file
@@ -0,0 +1,122 @@
|
||||
import unittest
|
||||
from types import SimpleNamespace
|
||||
|
||||
import requests
|
||||
|
||||
from sglang.srt.utils import kill_process_tree
|
||||
from sglang.test.few_shot_gsm8k import run_eval as run_eval_few_shot_gsm8k
|
||||
from sglang.test.test_utils import (
|
||||
DEFAULT_DEEPSEEK_W4AFP8_MODEL_FOR_TEST,
|
||||
DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
DEFAULT_URL_FOR_TEST,
|
||||
CustomTestCase,
|
||||
is_in_amd_ci,
|
||||
is_in_ci,
|
||||
popen_launch_server,
|
||||
try_cached_model,
|
||||
write_github_step_summary,
|
||||
)
|
||||
|
||||
|
||||
class TestDeepseekV3W4afp8(CustomTestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = try_cached_model(DEFAULT_DEEPSEEK_W4AFP8_MODEL_FOR_TEST)
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
other_args = ["--trust-remote-code", "--tp", "8", "--ep-size", "8"]
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=other_args,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
kill_process_tree(cls.process.pid)
|
||||
|
||||
def test_gsm8k(self):
|
||||
args = SimpleNamespace(
|
||||
num_shots=5,
|
||||
data_path=None,
|
||||
num_questions=1200,
|
||||
parallel=1200,
|
||||
max_new_tokens=512,
|
||||
host="http://127.0.0.1",
|
||||
port=int(self.base_url.split(":")[-1]),
|
||||
)
|
||||
metrics = run_eval_few_shot_gsm8k(args)
|
||||
print(f"Eval accuracy of GSM8K: {metrics=}")
|
||||
|
||||
self.assertGreater(metrics["accuracy"], 0.92)
|
||||
|
||||
|
||||
class TestDeepseekV3W4Afp8Mtp(CustomTestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.model = try_cached_model(DEFAULT_DEEPSEEK_W4AFP8_MODEL_FOR_TEST)
|
||||
cls.base_url = DEFAULT_URL_FOR_TEST
|
||||
other_args = [
|
||||
"--tp",
|
||||
"8",
|
||||
"--trust-remote-code",
|
||||
"--ep-size",
|
||||
"8",
|
||||
"--cuda-graph-bs",
|
||||
"256",
|
||||
"--disable-radix-cache",
|
||||
"--speculative-algorithm",
|
||||
"EAGLE",
|
||||
"--speculative-num-steps",
|
||||
"3",
|
||||
"--speculative-eagle-topk",
|
||||
"2",
|
||||
"--speculative-num-draft-tokens",
|
||||
"4",
|
||||
]
|
||||
if not is_in_amd_ci():
|
||||
other_args += ["--mem-frac", "0.7"]
|
||||
cls.process = popen_launch_server(
|
||||
cls.model,
|
||||
cls.base_url,
|
||||
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
|
||||
other_args=other_args,
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def tearDownClass(cls):
|
||||
kill_process_tree(cls.process.pid)
|
||||
|
||||
def test_gsm8k(
|
||||
self,
|
||||
):
|
||||
args = SimpleNamespace(
|
||||
num_shots=5,
|
||||
data_path=None,
|
||||
num_questions=200,
|
||||
max_new_tokens=512,
|
||||
parallel=128,
|
||||
host="http://127.0.0.1",
|
||||
port=int(self.base_url.split(":")[-1]),
|
||||
)
|
||||
metrics = run_eval_few_shot_gsm8k(args)
|
||||
print(f"{metrics=}")
|
||||
|
||||
server_info = requests.get(self.base_url + "/get_server_info")
|
||||
avg_spec_accept_length = server_info.json()["internal_states"][0][
|
||||
"avg_spec_accept_length"
|
||||
]
|
||||
print(f"{avg_spec_accept_length=}")
|
||||
|
||||
if is_in_ci():
|
||||
write_github_step_summary(
|
||||
f"### test_gsm8k (deepseek-v3 mtp)\n"
|
||||
f'{metrics["accuracy"]=:.3f}\n'
|
||||
f"{avg_spec_accept_length=:.2f}\n"
|
||||
)
|
||||
self.assertGreater(metrics["accuracy"], 0.935)
|
||||
self.assertGreater(avg_spec_accept_length, 2.9)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -143,6 +143,9 @@ suites = {
|
||||
"per-commit-8-gpu-deepep": [
|
||||
TestFile("ep/test_deepep_large.py", 338),
|
||||
],
|
||||
"per-commit-8-gpu-h20": [
|
||||
TestFile("quant/test_w4a8_deepseek_v3.py", 371),
|
||||
],
|
||||
"nightly": [
|
||||
TestFile("test_nightly_gsm8k_eval.py"),
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user