[kernel] Integrate flashinfer's rope with higher precision and better perf (#3134)
This commit is contained in:
@@ -94,6 +94,7 @@ sources = [
|
||||
"3rdparty/flashinfer/csrc/norm.cu",
|
||||
"3rdparty/flashinfer/csrc/sampling.cu",
|
||||
"3rdparty/flashinfer/csrc/renorm.cu",
|
||||
"3rdparty/flashinfer/csrc/rope.cu",
|
||||
]
|
||||
|
||||
enable_bf16 = os.getenv("SGL_KERNEL_ENABLE_BF16", "0") == "1"
|
||||
|
||||
Reference in New Issue
Block a user