Allow disabling flashinfer sampling kernel (#778)

This commit is contained in:
Lianmin Zheng
2024-07-27 20:18:56 -07:00
committed by GitHub
parent 30db99b3d9
commit 752e643007
6 changed files with 41 additions and 26 deletions

View File

@@ -7,8 +7,11 @@ from torch import nn
from sglang.global_config import global_config
from sglang.srt.layers.extend_attention import extend_attention_fwd
from sglang.srt.layers.token_attention import token_attention_fwd
from sglang.srt.managers.controller.model_runner import ForwardMode, InputMetadata
from sglang.srt.server import global_server_args_dict
from sglang.srt.managers.controller.model_runner import (
ForwardMode,
InputMetadata,
global_server_args_dict,
)
class RadixAttention(nn.Module):