use sgl custom all reduce (#4441)

2025-03-18 00:46:41 -07:00
parent 45212ce18b
commit c787298547
3 changed files with 4 additions and 4 deletions
--- a/.github/workflows/pr-test.yml
+++ b/.github/workflows/pr-test.yml
@@ -221,9 +221,9 @@ jobs:
        timeout-minutes: 10
        run: |
          cd test/srt
-          python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
+          USE_VLLM_CUSTOM_ALLREDUCE=1 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
-          USE_VLLM_CUSTOM_ALLREDUCE=0 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
+          python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
      - name: Benchmark single latency + torch.compile (TP=2)
        timeout-minutes: 10
--- a/python/sglang/srt/_custom_ops.py
+++ b/python/sglang/srt/_custom_ops.py
@@ -10,7 +10,7 @@ from sglang.srt.utils import get_bool_env_var, is_hip, is_hpu
 logger = logging.getLogger(__name__)
 use_vllm_custom_allreduce = get_bool_env_var(
-    "USE_VLLM_CUSTOM_ALLREDUCE", default="true"
+    "USE_VLLM_CUSTOM_ALLREDUCE", default="false"
 )
 if not is_hpu():
--- a/test/srt/test_bench_one_batch.py
+++ b/test/srt/test_bench_one_batch.py
@@ -29,7 +29,7 @@ class TestBenchOneBatch(unittest.TestCase):
        )
        use_vllm_custom_allreduce = get_bool_env_var(
-            "USE_VLLM_CUSTOM_ALLREDUCE", default="true"
+            "USE_VLLM_CUSTOM_ALLREDUCE", default="false"
        )
        if is_in_ci():