chore: upgrade flashinfer 0.4.0 (#11364)

This commit is contained in:
Yineng Zhang
2025-10-09 14:17:54 -07:00
committed by GitHub
parent 88bb627d0d
commit 44cb060785
5 changed files with 6 additions and 4 deletions

View File

@@ -24,7 +24,7 @@ dependencies = [
"datasets", "datasets",
"einops", "einops",
"fastapi", "fastapi",
"flashinfer_python==0.4.0rc3", "flashinfer_python==0.4.0",
"hf_transfer", "hf_transfer",
"huggingface_hub", "huggingface_hub",
"interegular", "interegular",

View File

@@ -70,7 +70,7 @@ srt = [
"torchaudio==2.8.0", "torchaudio==2.8.0",
"torchvision", "torchvision",
"cuda-python", "cuda-python",
"flashinfer_python==0.4.0rc3", "flashinfer_python==0.4.0",
] ]
# HIP (Heterogeneous-computing Interface for Portability) for AMD # HIP (Heterogeneous-computing Interface for Portability) for AMD

View File

@@ -703,7 +703,7 @@ def _set_envs_and_config(server_args: ServerArgs):
if server_args.attention_backend == "flashinfer": if server_args.attention_backend == "flashinfer":
assert_pkg_version( assert_pkg_version(
"flashinfer_python", "flashinfer_python",
"0.4.0rc3", "0.4.0",
"Please uninstall the old version and " "Please uninstall the old version and "
"reinstall the latest version by following the instructions " "reinstall the latest version by following the instructions "
"at https://docs.flashinfer.ai/installation.html.", "at https://docs.flashinfer.ai/installation.html.",

View File

@@ -1060,7 +1060,7 @@ def fast_mla_decode_plan(
try: try:
# Standard version with just the required arguments (no use_profiler) # Standard version with just the required arguments (no use_profiler)
self._cached_module.plan.default( self._cached_module.plan(
self._float_workspace_buffer, self._float_workspace_buffer,
self._int_workspace_buffer, self._int_workspace_buffer,
self._pin_memory_int_workspace_buffer, self._pin_memory_int_workspace_buffer,

View File

@@ -74,3 +74,5 @@ fi
# Show current packages # Show current packages
$PIP_CMD list $PIP_CMD list
python3 -c "import torch; print(torch.version.cuda)" python3 -c "import torch; print(torch.version.cuda)"
python3 -m flashinfer clear-cache