Simplify tests & Fix trtllm custom allreduce registration (#4252)

This commit is contained in:
Lianmin Zheng
2025-03-10 01:24:22 -07:00
committed by GitHub
parent 007f8b3dc2
commit aa957102a9
13 changed files with 30 additions and 211 deletions

View File

@@ -3,6 +3,7 @@ import unittest
from sglang.test.test_utils import (
DEFAULT_MODEL_NAME_FOR_TEST,
DEFAULT_MOE_MODEL_NAME_FOR_TEST,
get_bool_env_var,
is_in_ci,
run_bench_one_batch,
write_github_step_summary,
@@ -27,9 +28,13 @@ class TestBenchOneBatch(unittest.TestCase):
DEFAULT_MOE_MODEL_NAME_FOR_TEST, ["--tp", "2", "--cuda-graph-max-bs", "2"]
)
use_vllm_custom_allreduce = get_bool_env_var(
"USE_VLLM_CUSTOM_ALLREDUCE", default="true"
)
if is_in_ci():
write_github_step_summary(
f"### test_moe_tp2_bs1\n"
f"### test_moe_tp2_bs1 ({use_vllm_custom_allreduce=})\n"
f"output_throughput : {output_throughput:.2f} token/s\n"
)
self.assertGreater(output_throughput, 124)