Test no vllm custom allreduce (#4256)

This commit is contained in:
Lianmin Zheng
2025-03-10 10:08:25 -07:00
committed by GitHub
parent cf0ccd406e
commit 5a6400eec5
4 changed files with 5 additions and 5 deletions

View File

@@ -44,7 +44,7 @@ runtime_common = [
srt = [
"sglang[runtime_common]",
"sgl-kernel==0.0.4",
"sgl-kernel==0.0.4.post1",
"flashinfer_python==0.2.2.post1",
"torch==2.5.1",
"vllm>=0.6.4.post1,<=0.7.2",

View File

@@ -480,7 +480,7 @@ class ServerArgs:
"--chunked-prefill-size",
type=int,
default=ServerArgs.chunked_prefill_size,
help="The maximum number of tokens in a chunk for the chunked prefill. Setting this to -1 means disabling chunked prefill",
help="The maximum number of tokens in a chunk for the chunked prefill. Setting this to -1 means disabling chunked prefill.",
)
parser.add_argument(
"--max-prefill-tokens",
@@ -505,7 +505,7 @@ class ServerArgs:
"--cpu-offload-gb",
type=int,
default=ServerArgs.cpu_offload_gb,
help="How many GBs of RAM to reserve for CPU offloading",
help="How many GBs of RAM to reserve for CPU offloading.",
)
# Other runtime options