将分散在各文件中的CUDA/HIP/MUSA硬件相关头文件引用统一到vendors目录下的对应头文件中,提高代码可维护性。移除重复的头文件引用,优化构建配置。
10 lines
497 B
Plaintext
10 lines
497 B
Plaintext
#include "../../../vendors/functions.h"
|
|
#include "quantization/w8a8/per_token_group_quant_8bit.h"
|
|
|
|
void per_token_group_quant_int8(const torch::Tensor& input,
|
|
torch::Tensor& output_q,
|
|
torch::Tensor& output_s, int64_t group_size,
|
|
double eps, double int8_min, double int8_max) {
|
|
per_token_group_quant_8bit(input, output_q, output_s, group_size, eps,
|
|
int8_min, int8_max);
|
|
} |