[sgl-kernel] 1/N Refactor sglang cutlass 3x - gemm fp8 blockwise sm90 (#8913)

Co-authored-by: luoyuan.luo <luoyuan.luo@antgroup.com>
2025-08-15 01:55:54 +08:00
parent 1fea998a45
commit 432f2053dd
5 changed files with 322 additions and 151 deletions
--- a/sgl-kernel/csrc/cutlass_extensions/common.hpp
+++ b/sgl-kernel/csrc/cutlass_extensions/common.hpp
@@ -0,0 +1,21 @@
+#pragma once
+
+#include "cuda_runtime.h"
+#include "cutlass/cutlass.h"
+
+/**
+ * A wrapper for a kernel that is used to guard against compilation on
+ * architectures that will never use the kernel. The purpose of this is to
+ * reduce the size of the compiled binary.
+ * __CUDA_ARCH__ is not defined in host code, so this lets us smuggle the ifdef
+ * into code that will be executed on the device where it is defined.
+ */
+template <typename Kernel>
+struct enable_sm90_or_later : Kernel {
+  template <typename... Args>
+  CUTLASS_DEVICE void operator()(Args&&... args) {
+#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 900
+    Kernel::operator()(std::forward<Args>(args)...);
+#endif
+  }
+};