Test no vllm custom allreduce (#4256)

2025-03-10 10:08:25 -07:00
parent cf0ccd406e
commit 5a6400eec5
4 changed files with 5 additions and 5 deletions
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -44,7 +44,7 @@ runtime_common = [

 srt = [
    "sglang[runtime_common]",
-    "sgl-kernel==0.0.4",
+    "sgl-kernel==0.0.4.post1",
    "flashinfer_python==0.2.2.post1",
    "torch==2.5.1",
    "vllm>=0.6.4.post1,<=0.7.2",
--- a/python/sglang/srt/server_args.py
+++ b/python/sglang/srt/server_args.py
@@ -480,7 +480,7 @@ class ServerArgs:
            "--chunked-prefill-size",
            type=int,
            default=ServerArgs.chunked_prefill_size,
-            help="The maximum number of tokens in a chunk for the chunked prefill. Setting this to -1 means disabling chunked prefill",
+            help="The maximum number of tokens in a chunk for the chunked prefill. Setting this to -1 means disabling chunked prefill.",
        )
        parser.add_argument(
            "--max-prefill-tokens",
@@ -505,7 +505,7 @@ class ServerArgs:
            "--cpu-offload-gb",
            type=int,
            default=ServerArgs.cpu_offload_gb,
-            help="How many GBs of RAM to reserve for CPU offloading",
+            help="How many GBs of RAM to reserve for CPU offloading.",
        )

        # Other runtime options