use sgl custom all reduce (#4441)
This commit is contained in:
4
.github/workflows/pr-test.yml
vendored
4
.github/workflows/pr-test.yml
vendored
@@ -221,9 +221,9 @@ jobs:
|
||||
timeout-minutes: 10
|
||||
run: |
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
|
||||
USE_VLLM_CUSTOM_ALLREDUCE=1 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
|
||||
|
||||
USE_VLLM_CUSTOM_ALLREDUCE=0 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
|
||||
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
|
||||
|
||||
- name: Benchmark single latency + torch.compile (TP=2)
|
||||
timeout-minutes: 10
|
||||
|
||||
@@ -10,7 +10,7 @@ from sglang.srt.utils import get_bool_env_var, is_hip, is_hpu
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
use_vllm_custom_allreduce = get_bool_env_var(
|
||||
"USE_VLLM_CUSTOM_ALLREDUCE", default="true"
|
||||
"USE_VLLM_CUSTOM_ALLREDUCE", default="false"
|
||||
)
|
||||
|
||||
if not is_hpu():
|
||||
|
||||
@@ -29,7 +29,7 @@ class TestBenchOneBatch(unittest.TestCase):
|
||||
)
|
||||
|
||||
use_vllm_custom_allreduce = get_bool_env_var(
|
||||
"USE_VLLM_CUSTOM_ALLREDUCE", default="true"
|
||||
"USE_VLLM_CUSTOM_ALLREDUCE", default="false"
|
||||
)
|
||||
|
||||
if is_in_ci():
|
||||
|
||||
Reference in New Issue
Block a user