sglangv0.5.2 & support Qwen3-Next-80B-A3B-Instruct
This commit is contained in:
21
sgl-kernel/csrc/cutlass_extensions/common.hpp
Normal file
21
sgl-kernel/csrc/cutlass_extensions/common.hpp
Normal file
@@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include "cuda_runtime.h"
|
||||
#include "cutlass/cutlass.h"
|
||||
|
||||
/**
|
||||
* A wrapper for a kernel that is used to guard against compilation on
|
||||
* architectures that will never use the kernel. The purpose of this is to
|
||||
* reduce the size of the compiled binary.
|
||||
* __CUDA_ARCH__ is not defined in host code, so this lets us smuggle the ifdef
|
||||
* into code that will be executed on the device where it is defined.
|
||||
*/
|
||||
template <typename Kernel>
|
||||
struct enable_sm90_or_later : Kernel {
|
||||
template <typename... Args>
|
||||
CUTLASS_DEVICE void operator()(Args&&... args) {
|
||||
#if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 900
|
||||
Kernel::operator()(std::forward<Args>(args)...);
|
||||
#endif
|
||||
}
|
||||
};
|
||||
Reference in New Issue
Block a user