diff --git a/.github/workflows/pr-test.yml b/.github/workflows/pr-test.yml
index a7aa55cc9..36983115d 100644
--- a/.github/workflows/pr-test.yml
+++ b/.github/workflows/pr-test.yml
@@ -221,9 +221,9 @@ jobs:
         timeout-minutes: 10
         run: |
           cd test/srt
-          python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
+          USE_VLLM_CUSTOM_ALLREDUCE=1 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
 
-          USE_VLLM_CUSTOM_ALLREDUCE=0 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
+          python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
 
       - name: Benchmark single latency + torch.compile (TP=2)
         timeout-minutes: 10
diff --git a/python/sglang/srt/_custom_ops.py b/python/sglang/srt/_custom_ops.py
index 0584dc80f..d0bc51261 100644
--- a/python/sglang/srt/_custom_ops.py
+++ b/python/sglang/srt/_custom_ops.py
@@ -10,7 +10,7 @@ from sglang.srt.utils import get_bool_env_var, is_hip, is_hpu
 
 logger = logging.getLogger(__name__)
 use_vllm_custom_allreduce = get_bool_env_var(
-    "USE_VLLM_CUSTOM_ALLREDUCE", default="true"
+    "USE_VLLM_CUSTOM_ALLREDUCE", default="false"
 )
 
 if not is_hpu():
diff --git a/test/srt/test_bench_one_batch.py b/test/srt/test_bench_one_batch.py
index e015da6a1..65c894b57 100644
--- a/test/srt/test_bench_one_batch.py
+++ b/test/srt/test_bench_one_batch.py
@@ -29,7 +29,7 @@ class TestBenchOneBatch(unittest.TestCase):
         )
 
         use_vllm_custom_allreduce = get_bool_env_var(
-            "USE_VLLM_CUSTOM_ALLREDUCE", default="true"
+            "USE_VLLM_CUSTOM_ALLREDUCE", default="false"
         )
 
         if is_in_ci():