From a49dc52bfa1e04f7713644266b0992e8e977a2bb Mon Sep 17 00:00:00 2001 From: Lianmin Zheng Date: Tue, 30 Jan 2024 15:37:43 +0000 Subject: [PATCH] release v0.1.10 --- README.md | 1 + python/pyproject.toml | 2 +- python/sglang/__init__.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7218f10b8..b21e677c4 100644 --- a/README.md +++ b/README.md @@ -351,6 +351,7 @@ python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port ``` python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --mem-fraction-static 0.7 ``` +- You can turn on [flashinfer](docs/flashinfer.md) to acclerate the inference by using highly optimized CUDA kernels. ### Supported Models - Llama diff --git a/python/pyproject.toml b/python/pyproject.toml index 6a133b54a..6fb94df40 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "sglang" -version = "0.1.9" +version = "0.1.10" description = "A structured generation langauge for LLMs." readme = "README.md" requires-python = ">=3.8" diff --git a/python/sglang/__init__.py b/python/sglang/__init__.py index f0bd5ce75..2d01bcb03 100644 --- a/python/sglang/__init__.py +++ b/python/sglang/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.1.9" +__version__ = "0.1.10" from sglang.api import * from sglang.global_config import global_config