Update version to v0.1.13 (#280)

This commit is contained in:
Lianmin Zheng
2024-03-11 05:49:27 -07:00
committed by GitHub
parent 13662fd533
commit 4aa5dd2c5f
11 changed files with 35 additions and 21 deletions

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "sglang"
version = "0.1.12"
version = "0.1.13"
description = "A structured generation langauge for LLMs."
readme = "README.md"
requires-python = ">=3.8"

View File

@@ -1,4 +1,4 @@
__version__ = "0.1.12"
__version__ = "0.1.13"
from sglang.api import *
from sglang.global_config import global_config

View File

@@ -17,7 +17,7 @@ class RadixAttention(nn.Module):
from sglang.srt.managers.router.model_runner import global_server_args_dict
if global_server_args_dict["enable_flashinfer"]:
if global_server_args_dict.get("enable_flashinfer", False):
self.prefill_forward = self.prefill_forward_flashinfer
self.extend_forward = self.prefill_forward_flashinfer
self.decode_forward = self.decode_forward_flashinfer

View File

@@ -7,7 +7,7 @@ import triton.language as tl
from sglang.srt.managers.router.model_runner import global_server_args_dict
from sglang.srt.utils import wrap_kernel_launcher
if global_server_args_dict["attention_reduce_in_fp32"]:
if global_server_args_dict.get("attention_reduce_in_fp32", False):
REDUCE_TRITON_TYPE = tl.float32
REDUCE_TORCH_TYPE = torch.float32
else:

View File

@@ -222,7 +222,7 @@ class InputMetadata:
if forward_mode == ForwardMode.EXTEND:
ret.init_extend_args()
if global_server_args_dict["enable_flashinfer"]:
if global_server_args_dict.get("enable_flashinfer", False):
ret.init_flashinfer_args(tp_size)
return ret