Update version to v0.1.13 (#280)
This commit is contained in:
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
||||
|
||||
[project]
|
||||
name = "sglang"
|
||||
version = "0.1.12"
|
||||
version = "0.1.13"
|
||||
description = "A structured generation langauge for LLMs."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.8"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
__version__ = "0.1.12"
|
||||
__version__ = "0.1.13"
|
||||
|
||||
from sglang.api import *
|
||||
from sglang.global_config import global_config
|
||||
|
||||
@@ -17,7 +17,7 @@ class RadixAttention(nn.Module):
|
||||
|
||||
from sglang.srt.managers.router.model_runner import global_server_args_dict
|
||||
|
||||
if global_server_args_dict["enable_flashinfer"]:
|
||||
if global_server_args_dict.get("enable_flashinfer", False):
|
||||
self.prefill_forward = self.prefill_forward_flashinfer
|
||||
self.extend_forward = self.prefill_forward_flashinfer
|
||||
self.decode_forward = self.decode_forward_flashinfer
|
||||
|
||||
@@ -7,7 +7,7 @@ import triton.language as tl
|
||||
from sglang.srt.managers.router.model_runner import global_server_args_dict
|
||||
from sglang.srt.utils import wrap_kernel_launcher
|
||||
|
||||
if global_server_args_dict["attention_reduce_in_fp32"]:
|
||||
if global_server_args_dict.get("attention_reduce_in_fp32", False):
|
||||
REDUCE_TRITON_TYPE = tl.float32
|
||||
REDUCE_TORCH_TYPE = torch.float32
|
||||
else:
|
||||
|
||||
@@ -222,7 +222,7 @@ class InputMetadata:
|
||||
if forward_mode == ForwardMode.EXTEND:
|
||||
ret.init_extend_args()
|
||||
|
||||
if global_server_args_dict["enable_flashinfer"]:
|
||||
if global_server_args_dict.get("enable_flashinfer", False):
|
||||
ret.init_flashinfer_args(tp_size)
|
||||
|
||||
return ret
|
||||
|
||||
Reference in New Issue
Block a user