Release v0.3.6.post2 (#2214)

Co-authored-by: Yineng Zhang <me@zhyncs.com>
2024-11-27 03:35:30 -08:00
parent fb6e04a0c2
commit fed4c6946a
9 changed files with 16 additions and 17 deletions
--- a/python/pyproject.toml
+++ b/python/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "sglang"
-version = "0.3.6.post1"
+version = "0.3.6.post2"
 description = "SGLang is yet another fast serving framework for large language models and vision language models."
 readme = "README.md"
 requires-python = ">=3.8"
--- a/python/sglang/bench_one_batch.py
+++ b/python/sglang/bench_one_batch.py
@@ -466,7 +466,6 @@ if __name__ == "__main__":

    try:
        main(server_args, bench_args)
-    except Exception as e:
-        raise e
    finally:
-        kill_child_process()
+        if server_args.tp_size != 1:
+            kill_child_process()
--- a/python/sglang/launch_server.py
+++ b/python/sglang/launch_server.py
@@ -11,7 +11,5 @@ if __name__ == "__main__":

    try:
        launch_server(server_args)
-    except Exception as e:
-        raise e
    finally:
        kill_child_process()
--- a/python/sglang/srt/utils.py
+++ b/python/sglang/srt/utils.py
@@ -517,6 +517,11 @@ def monkey_patch_vllm_p2p_access_check(gpu_id: int):

    setattr(tgt, "gpu_p2p_access_check", lambda *arg, **kwargs: True)

+    # Suppress the warnings from this delete function when using sglang.bench_one_batch
+    from vllm.distributed.device_communicators.custom_all_reduce import CustomAllreduce
+
+    setattr(CustomAllreduce, "__del__", lambda *args, **kwargs: None)
+

 vllm_all_gather_backup = None

--- a/python/sglang/version.py
+++ b/python/sglang/version.py
@@ -1 +1 @@
-__version__ = "0.3.6.post1"
+__version__ = "0.3.6.post2"