refactor: 统一硬件相关头文件引用
将分散在各文件中的CUDA/HIP/MUSA硬件相关头文件引用统一到vendors目录下的对应头文件中,提高代码可维护性。移除重复的头文件引用,优化构建配置。
This commit is contained in:
@@ -1,9 +1,7 @@
|
||||
// see csrc/quantization/w8a8/cutlass/moe/get_group_starts.cuh
|
||||
#pragma once
|
||||
|
||||
#include <cuda.h>
|
||||
#include <torch/all.h>
|
||||
#include <c10/cuda/CUDAStream.h>
|
||||
#include "../../vendors/functions.h"
|
||||
|
||||
#include "core/scalar_type.hpp"
|
||||
#include "cutlass/bfloat16.h"
|
||||
|
||||
@@ -14,9 +14,8 @@
|
||||
#include "cutlass/util/mixed_dtype_utils.hpp"
|
||||
|
||||
// vllm includes
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#include <c10/cuda/CUDAGuard.h>
|
||||
#include <torch/all.h>
|
||||
#include "../../vendors/functions.h"
|
||||
|
||||
#include "cutlass_extensions/torch_utils.hpp"
|
||||
#include "cutlass_extensions/common.hpp"
|
||||
|
||||
|
||||
@@ -3,9 +3,10 @@
|
||||
// https://github.com/NVIDIA/cutlass/blob/main/examples/55_hopper_mixed_dtype_gemm/55_hopper_int4_fp8_gemm.cu
|
||||
//
|
||||
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#include <c10/cuda/CUDAGuard.h>
|
||||
#include <torch/all.h>
|
||||
|
||||
#include "../../vendors/functions.h"
|
||||
|
||||
|
||||
#include "cutlass_extensions/torch_utils.hpp"
|
||||
#include "w4a8_utils.cuh"
|
||||
|
||||
@@ -26,7 +27,6 @@
|
||||
#include "cutlass_extensions/common.hpp"
|
||||
#include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp"
|
||||
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
namespace vllm::cutlass_w4a8 {
|
||||
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
#include "w4a8_utils.cuh"
|
||||
|
||||
|
||||
#include "../../vendors/functions.h"
|
||||
|
||||
|
||||
#include <array>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cstdio>
|
||||
|
||||
namespace vllm::cutlass_w4a8_utils {
|
||||
|
||||
Reference in New Issue
Block a user