Test no vllm custom allreduce (#4256)
This commit is contained in:
2
.github/workflows/pr-test.yml
vendored
2
.github/workflows/pr-test.yml
vendored
@@ -266,7 +266,7 @@ jobs:
|
||||
cd test/srt
|
||||
python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
|
||||
|
||||
# USE_VLLM_CUSTOM_ALLREDUCE=0 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
|
||||
USE_VLLM_CUSTOM_ALLREDUCE=0 python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_tp2_bs1
|
||||
|
||||
- name: Benchmark single latency + torch.compile (TP=2)
|
||||
timeout-minutes: 10
|
||||
|
||||
@@ -44,7 +44,7 @@ runtime_common = [
|
||||
|
||||
srt = [
|
||||
"sglang[runtime_common]",
|
||||
"sgl-kernel==0.0.4",
|
||||
"sgl-kernel==0.0.4.post1",
|
||||
"flashinfer_python==0.2.2.post1",
|
||||
"torch==2.5.1",
|
||||
"vllm>=0.6.4.post1,<=0.7.2",
|
||||
|
||||
@@ -480,7 +480,7 @@ class ServerArgs:
|
||||
"--chunked-prefill-size",
|
||||
type=int,
|
||||
default=ServerArgs.chunked_prefill_size,
|
||||
help="The maximum number of tokens in a chunk for the chunked prefill. Setting this to -1 means disabling chunked prefill",
|
||||
help="The maximum number of tokens in a chunk for the chunked prefill. Setting this to -1 means disabling chunked prefill.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--max-prefill-tokens",
|
||||
@@ -505,7 +505,7 @@ class ServerArgs:
|
||||
"--cpu-offload-gb",
|
||||
type=int,
|
||||
default=ServerArgs.cpu_offload_gb,
|
||||
help="How many GBs of RAM to reserve for CPU offloading",
|
||||
help="How many GBs of RAM to reserve for CPU offloading.",
|
||||
)
|
||||
|
||||
# Other runtime options
|
||||
|
||||
@@ -26,4 +26,4 @@ pip install transformers==4.45.2 sentence_transformers accelerate peft pandas da
|
||||
pip install cuda-python nvidia-cuda-nvrtc-cu12
|
||||
|
||||
# reinstall sgl-kernel
|
||||
pip install sgl-kernel==0.0.4 --force-reinstall --no-deps
|
||||
pip install sgl-kernel==0.0.4.post1 --force-reinstall --no-deps
|
||||
|
||||
Reference in New Issue
Block a user