use sgl custom all reduce (#4441)
This commit is contained in:
4
.github/workflows/pr-test.yml
vendored
4
.github/workflows/pr-test.yml
vendored
@@ -221,9 +221,9 @@ jobs:
|
|||||||
timeout-minutes: 10
|
timeout-minutes: 10
|
||||||
run: |
|
run: |
|
||||||
cd test/srt
|
cd test/srt
|
||||||
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
|
USE_VLLM_CUSTOM_ALLREDUCE=1 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
|
||||||
|
|
||||||
USE_VLLM_CUSTOM_ALLREDUCE=0 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
|
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
|
||||||
|
|
||||||
- name: Benchmark single latency + torch.compile (TP=2)
|
- name: Benchmark single latency + torch.compile (TP=2)
|
||||||
timeout-minutes: 10
|
timeout-minutes: 10
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from sglang.srt.utils import get_bool_env_var, is_hip, is_hpu
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
use_vllm_custom_allreduce = get_bool_env_var(
|
use_vllm_custom_allreduce = get_bool_env_var(
|
||||||
"USE_VLLM_CUSTOM_ALLREDUCE", default="true"
|
"USE_VLLM_CUSTOM_ALLREDUCE", default="false"
|
||||||
)
|
)
|
||||||
|
|
||||||
if not is_hpu():
|
if not is_hpu():
|
||||||
|
|||||||
@@ -29,7 +29,7 @@ class TestBenchOneBatch(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
use_vllm_custom_allreduce = get_bool_env_var(
|
use_vllm_custom_allreduce = get_bool_env_var(
|
||||||
"USE_VLLM_CUSTOM_ALLREDUCE", default="true"
|
"USE_VLLM_CUSTOM_ALLREDUCE", default="false"
|
||||||
)
|
)
|
||||||
|
|
||||||
if is_in_ci():
|
if is_in_ci():
|
||||||
|
|||||||
Reference in New Issue
Block a user