release v0.1.10
This commit is contained in:
@@ -351,6 +351,7 @@ python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port
|
|||||||
```
|
```
|
||||||
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --mem-fraction-static 0.7
|
python -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000 --mem-fraction-static 0.7
|
||||||
```
|
```
|
||||||
|
- You can turn on [flashinfer](docs/flashinfer.md) to acclerate the inference by using highly optimized CUDA kernels.
|
||||||
|
|
||||||
### Supported Models
|
### Supported Models
|
||||||
- Llama
|
- Llama
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "sglang"
|
name = "sglang"
|
||||||
version = "0.1.9"
|
version = "0.1.10"
|
||||||
description = "A structured generation langauge for LLMs."
|
description = "A structured generation langauge for LLMs."
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.8"
|
requires-python = ">=3.8"
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
__version__ = "0.1.9"
|
__version__ = "0.1.10"
|
||||||
|
|
||||||
from sglang.api import *
|
from sglang.api import *
|
||||||
from sglang.global_config import global_config
|
from sglang.global_config import global_config
|
||||||
|
|||||||
Reference in New Issue
Block a user