[1/N]Support DeepSeek-R1 w4a8 normal deepep (#8247)

Co-authored-by: Hank Han <hanhan7630@outlook.com>
This commit is contained in:
Jinwu
2025-10-15 11:10:53 +08:00
committed by GitHub
parent a40229f6f8
commit 825432fce6
7 changed files with 334 additions and 7 deletions

View File

@@ -118,5 +118,60 @@ class TestDeepseekV3W4Afp8Mtp(CustomTestCase):
self.assertGreater(avg_spec_accept_length, 2.9)
class TestDeepseekV3W4Afp8DeepepNormal(CustomTestCase):
@classmethod
def setUpClass(cls):
cls.model = try_cached_model(DEFAULT_DEEPSEEK_W4AFP8_MODEL_FOR_TEST)
cls.base_url = DEFAULT_URL_FOR_TEST
other_args = [
"--tp",
"8",
"--trust-remote-code",
"--ep-size",
"8",
"--cuda-graph-bs",
"256",
"--disable-radix-cache",
"--moe-a2a-backend",
"deepep",
"--deepep-mode",
"normal",
"--dp",
"8",
"--enable-dp-attention",
"--moe-runner-backend",
"cutlass",
]
if not is_in_amd_ci():
other_args += ["--mem-frac", "0.7"]
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=other_args,
)
@classmethod
def tearDownClass(cls):
kill_process_tree(cls.process.pid)
def test_gsm8k(
self,
):
args = SimpleNamespace(
num_shots=5,
data_path=None,
num_questions=200,
max_new_tokens=512,
parallel=128,
host="http://127.0.0.1",
port=int(self.base_url.split(":")[-1]),
)
metrics = run_eval_few_shot_gsm8k(args)
print(f"Eval accuracy of GSM8K: {metrics=}")
self.assertGreater(metrics["accuracy"], 0.92)
if __name__ == "__main__":
unittest.main()