Turn on flashinfer by default (#578)

This commit is contained in:
Ying Sheng
2024-07-02 02:25:07 -07:00
committed by GitHub
parent 95dc093b19
commit 9380f50ff9
5 changed files with 14 additions and 27 deletions

View File

@@ -26,7 +26,7 @@ class RadixAttention(nn.Module):
from sglang.srt.managers.controller.model_runner import global_server_args_dict
if global_server_args_dict.get("enable_flashinfer", False):
if not global_server_args_dict.get("disable_flashinfer", False):
self.prefill_forward = self.prefill_forward_flashinfer
self.extend_forward = self.prefill_forward_flashinfer
self.decode_forward = self.decode_forward_flashinfer