[kernel] Integrate flashinfer's rope with higher precision and better perf (#3134)

This commit is contained in:
Byron Hsu
2025-01-26 23:28:00 -08:00
committed by GitHub
parent af02f99b7c
commit fb11a43981
8 changed files with 244 additions and 98 deletions

View File

@@ -94,6 +94,7 @@ sources = [
"3rdparty/flashinfer/csrc/norm.cu",
"3rdparty/flashinfer/csrc/sampling.cu",
"3rdparty/flashinfer/csrc/renorm.cu",
"3rdparty/flashinfer/csrc/rope.cu",
]
enable_bf16 = os.getenv("SGL_KERNEL_ENABLE_BF16", "0") == "1"