diff --git a/docs/release_process.md b/docs/release_process.md index 1bab0fd14..e817a256e 100644 --- a/docs/release_process.md +++ b/docs/release_process.md @@ -1,3 +1,10 @@ +# PyPI Package Release Process + +## Update the version in code +Update the package version in `python/pyproject.toml` and `python/sglang/__init__.py`. + +## Upload the PyPI package + ``` pip install build twine ``` @@ -5,4 +12,7 @@ pip install build twine ``` cd python bash upload_pypi.sh -``` \ No newline at end of file +``` + +## Make a release in GitHub +Make a new release https://github.com/sgl-project/sglang/releases/new. diff --git a/python/sglang/srt/server.py b/python/sglang/srt/server.py index 78bd2e0d1..045609b34 100644 --- a/python/sglang/srt/server.py +++ b/python/sglang/srt/server.py @@ -152,7 +152,9 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg if server_args.disable_disk_cache: disable_cache() if not server_args.disable_flashinfer: - assert_pkg_version("flashinfer", "0.0.8") + assert_pkg_version("flashinfer", "0.0.8", "Please uninstall the old version and " + "reinstall the latest version by following the instructions " + "at https://docs.flashinfer.ai/installation.html.") if server_args.chat_template: # TODO: replace this with huggingface transformers template load_chat_template_for_openai_api(server_args.chat_template) @@ -293,7 +295,7 @@ def launch_server(server_args: ServerArgs, pipe_finish_writer, model_overide_arg app, host=server_args.host, port=server_args.port, - log_level=server_args.log_level_http or server_args.log_level_http, + log_level=server_args.log_level_http or server_args.log_level, timeout_keep_alive=5, loop="uvloop", ) diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index c47b90cc6..a1b8014bc 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -50,11 +50,11 @@ class ServerArgs: load_balance_method: str = "round_robin" # Optimization/debug options - disable_flashinfer: bool = True - attention_reduce_in_fp32: bool = False + disable_flashinfer: bool = False disable_radix_cache: bool = False disable_regex_jump_forward: bool = False disable_disk_cache: bool = False + attention_reduce_in_fp32: bool = False # Distributed args nccl_init_addr: Optional[str] = None diff --git a/python/sglang/srt/utils.py b/python/sglang/srt/utils.py index 94cabee50..6da86cbeb 100644 --- a/python/sglang/srt/utils.py +++ b/python/sglang/srt/utils.py @@ -426,17 +426,19 @@ def suppress_other_loggers(): logging.getLogger("vllm.utils").setLevel(logging.WARN) -def assert_pkg_version(pkg: str, min_version: str): +def assert_pkg_version(pkg: str, min_version: str, message: str): try: installed_version = version(pkg) if pkg_version.parse(installed_version) < pkg_version.parse(min_version): raise Exception( - f"{pkg} is installed with version {installed_version} which " - f"is less than the minimum required version {min_version}" + f"{pkg} is installed with version {installed_version}, which " + f"is less than the minimum required version {min_version}. " + + message ) except PackageNotFoundError: raise Exception( - f"{pkg} with minimum required version {min_version} is not installed" + f"{pkg} with minimum required version {min_version} is not installed. " + + message ) @@ -588,4 +590,4 @@ def receive_addrs(model_port_args, server_args): print(f"Node 0 received from rank {src_rank}: {tensor.tolist()}") dist.barrier() - dist.destroy_process_group() \ No newline at end of file + dist.destroy_process_group()