cleanup unused header in sgl_kernel (#2986)

This commit is contained in:
Yineng Zhang
2025-01-20 00:44:49 +08:00
committed by GitHub
parent def5c31873
commit a69cb5cff7
5 changed files with 12 additions and 26 deletions

View File

@@ -3,11 +3,8 @@
#pragma once
#include "cutlass/arch/memory.h"
#include "cutlass/arch/memory_sm75.h"
#include "cutlass/cutlass.h"
#include "cutlass/fast_math.h"
#include "cutlass/numeric_conversion.h"
#include <cutlass/arch/memory.h>
#include <cutlass/numeric_conversion.h>
namespace cutlass {
namespace epilogue {

View File

@@ -2,16 +2,9 @@
// https://github.com/NVIDIA/TensorRT-LLM/blob/be1788106245496872d18e702978e59b6bfd50e0/cpp/tensorrt_llm/cutlass_extensions/include/cutlass_extensions/gemm/device/gemm_universal_base_compat.h
#pragma once
#include "cutlass/arch/arch.h"
#include "cutlass/cutlass.h"
#include "cutlass/device_kernel.h"
#include "cutlass/gemm/device/default_gemm_configuration.h"
#include "cutlass/gemm/gemm.h"
#include "cutlass/gemm/kernel/default_gemm_universal.h"
#include "cutlass/gemm/kernel/gemm_universal.h"
#include "cutlass/gemm/threadblock/threadblock_swizzle.h"
#include "cutlass/numeric_types.h"
#include "cutlass/trace.h"
#include <cutlass/cutlass.h>
#include <cutlass/device_kernel.h>
#include <cutlass/trace.h>
////////////////////////////////////////////////////////////////////////////////

View File

@@ -3,14 +3,11 @@
#pragma once
#include "cutlass/complex.h"
#include "cutlass/cutlass.h"
#include "cutlass/fast_math.h"
#include "cutlass/gemm/gemm.h"
#include "cutlass/matrix_coord.h"
#include "cutlass/semaphore.h"
#include "cutlass/trace.h"
#include "cutlass_extensions/epilogue/epilogue_per_row_per_col_scale.h"
#include <cutlass/complex.h>
#include <cutlass/cutlass.h>
#include <cutlass/fast_math.h>
#include <cutlass/matrix_coord.h>
#include <cutlass/trace.h>
/////////////////////////////////////////////////////////////////////////////////////////////////

View File

@@ -1,3 +1,5 @@
#include <vector>
#include "utils.hpp"
// trt_reduce

View File

@@ -3,9 +3,6 @@
#include <c10/cuda/CUDAStream.h>
#include <cassert>
#include <iostream>
#include <sstream>
#include <unordered_map>
#include "trt_reduce_internal.cuh"