diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml index a7aa55cc9..36983115d 100644 --- a/.github/workflows/pr-test.yml +++ b/.github/workflows/pr-test.yml @@ -221,9 +221,9 @@ jobs: timeout-minutes: 10 run: | cd test/srt - python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1 + USE_VLLM_CUSTOM_ALLREDUCE=1 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1 - USE_VLLM_CUSTOM_ALLREDUCE=0 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1 + python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1 - name: Benchmark single latency + torch.compile (TP=2) timeout-minutes: 10 diff --git a/python/sglang/srt/_custom_ops.py b/python/sglang/srt/_custom_ops.py index 0584dc80f..d0bc51261 100644 --- a/python/sglang/srt/_custom_ops.py +++ b/python/sglang/srt/_custom_ops.py @@ -10,7 +10,7 @@ from sglang.srt.utils import get_bool_env_var, is_hip, is_hpu logger = logging.getLogger(__name__) use_vllm_custom_allreduce = get_bool_env_var( - "USE_VLLM_CUSTOM_ALLREDUCE", default="true" + "USE_VLLM_CUSTOM_ALLREDUCE", default="false" ) if not is_hpu(): diff --git a/test/srt/test_bench_one_batch.py b/test/srt/test_bench_one_batch.py index e015da6a1..65c894b57 100644 --- a/test/srt/test_bench_one_batch.py +++ b/test/srt/test_bench_one_batch.py @@ -29,7 +29,7 @@ class TestBenchOneBatch(unittest.TestCase): ) use_vllm_custom_allreduce = get_bool_env_var( - "USE_VLLM_CUSTOM_ALLREDUCE", default="true" + "USE_VLLM_CUSTOM_ALLREDUCE", default="false" ) if is_in_ci():