Fix fast decode plan for flashinfer v0.4.0rc1 and upgrade sgl-kernel 0.3.11 (#10634)
Co-authored-by: zhyncs <me@zhyncs.com>
This commit is contained in:
@@ -57,12 +57,12 @@ dependencies = [
|
||||
"uvicorn",
|
||||
"uvloop",
|
||||
"xgrammar==0.1.24",
|
||||
"sgl-kernel==0.3.10",
|
||||
"sgl-kernel==0.3.11",
|
||||
"torch==2.8.0",
|
||||
"torchaudio==2.8.0",
|
||||
"torchvision",
|
||||
"cuda-python",
|
||||
"flashinfer_python==0.3.1",
|
||||
"flashinfer_python==0.4.0rc1",
|
||||
"openai==1.99.1",
|
||||
"tiktoken",
|
||||
"anthropic>=0.20.0",
|
||||
|
||||
@@ -65,7 +65,7 @@ tracing = [
|
||||
|
||||
srt = [
|
||||
"sglang[runtime_common]",
|
||||
"sgl-kernel==0.3.10",
|
||||
"sgl-kernel==0.3.11",
|
||||
"torch==2.8.0",
|
||||
"torchaudio==2.8.0",
|
||||
"torchvision",
|
||||
@@ -75,7 +75,7 @@ srt = [
|
||||
|
||||
blackwell = [
|
||||
"sglang[runtime_common]",
|
||||
"sgl-kernel==0.3.10",
|
||||
"sgl-kernel==0.3.11",
|
||||
"torch==2.8.0",
|
||||
"torchaudio==2.8.0",
|
||||
"torchvision",
|
||||
|
||||
@@ -703,7 +703,7 @@ def _set_envs_and_config(server_args: ServerArgs):
|
||||
if server_args.attention_backend == "flashinfer":
|
||||
assert_pkg_version(
|
||||
"flashinfer_python",
|
||||
"0.3.1",
|
||||
"0.4.0rc1",
|
||||
"Please uninstall the old version and "
|
||||
"reinstall the latest version by following the instructions "
|
||||
"at https://docs.flashinfer.ai/installation.html.",
|
||||
@@ -711,7 +711,7 @@ def _set_envs_and_config(server_args: ServerArgs):
|
||||
if _is_cuda and not get_bool_env_var("SGLANG_SKIP_SGL_KERNEL_VERSION_CHECK"):
|
||||
assert_pkg_version(
|
||||
"sgl-kernel",
|
||||
"0.3.10",
|
||||
"0.3.11",
|
||||
"Please reinstall the latest version with `pip install sgl-kernel --force-reinstall`",
|
||||
)
|
||||
|
||||
|
||||
@@ -1432,6 +1432,9 @@ def fast_decode_plan(
|
||||
head_dim,
|
||||
head_dim,
|
||||
False, # causal
|
||||
window_left,
|
||||
-1,
|
||||
False,
|
||||
)
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"Error in standard plan: {e}")
|
||||
|
||||
Reference in New Issue
Block a user