use sgl custom all reduce (#4441)

This commit is contained in:
Yineng Zhang
2025-03-18 00:46:41 -07:00
committed by GitHub
parent 45212ce18b
commit c787298547
3 changed files with 4 additions and 4 deletions

View File

@@ -221,9 +221,9 @@ jobs:
timeout-minutes: 10
run: |
cd test/srt
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
USE_VLLM_CUSTOM_ALLREDUCE=1 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
USE_VLLM_CUSTOM_ALLREDUCE=0 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
- name: Benchmark single latency + torch.compile (TP=2)
timeout-minutes: 10