Files
2026-02-04 17:39:32 +08:00

294 lines
12 KiB
C++

/*************************************************************************
* Copyright (C) [2023-2024] by Cambricon, Inc.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*************************************************************************/
#ifndef CSRC_COMMON_UTILS_H_
#define CSRC_COMMON_UTILS_H_
#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <cstring>
#include <exception>
#include <functional>
#include <future> // NOLINT
#include <initializer_list>
#include <iostream>
#include <map>
#include <memory>
#include <stdexcept>
#include <string>
#include <thread> // NOLINT
#include <tuple>
#include <vector>
#include "cn_api.h"
#include "cnnl.h"
#include "cnnl_extra.h"
#include "cnrt.h"
#include "stack_exception.h"
namespace tmo {
inline cnnlQuantizeLayout_t strToQuantizeLayout(std::string param) {
static std::map<std::string, cnnlQuantizeLayout_t> quantize_layout_map = {
{"quantize_none", CNNL_QUANTIZE_NONE},
{"quantize_per_tensor", CNNL_QUANTIZE_PER_TENSOR},
{"quantize_per_channel", CNNL_QUANTIZE_PER_CHANNEL},
{"quantize_per_token", CNNL_QUANTIZE_PER_TOKEN},
{"quantize_group_wise", CNNL_QUANTIZE_GROUP_WISE}};
return quantize_layout_map[param];
}
inline cnnlActivationMode_t strToActivationMode(std::string param) {
static std::map<std::string, cnnlActivationMode_t> act_mode_map = {
{"gelu", CNNL_ACTIVATION_GELU},
{"relu", CNNL_ACTIVATION_RELU},
{"sigmoid", CNNL_ACTIVATION_SIGMOID},
{"silu", CNNL_ACTIVATION_SWISH},
{"none", CNNL_ACTIVATION_IDENTITY}};
return act_mode_map[param];
}
inline cnnlLLMQuantAlgo_t strToQuantizeAlgo(std::string param) {
static std::map<std::string, cnnlLLMQuantAlgo_t> quant_algo_map = {
{"weight_only", CNNL_WEIGHT_ONLY},
{"smooth_quant", CNNL_SMOOTH_QUANT},
{"none", CNNL_NO_QUANT}};
return quant_algo_map[param];
}
namespace lnres {
namespace internal {
using LnresEnum = cnnlTransformerLayernormResidualStructure_t;
struct Helper {
int layernorm_position; // 0: no layernorm, 1: pre layernorm, 2: post layernorm
int residual_position; // 0: no residual, 1: layernorm inside residual, 2: layernorm outside
// residual
constexpr Helper(cnnlTransformerLayernormResidualStructure_t mode);
constexpr Helper(int layernorm_position, int residual_position)
: layernorm_position(layernorm_position), residual_position(residual_position) {}
constexpr bool operator==(const Helper &other) const {
return layernorm_position == other.layernorm_position &&
residual_position == other.residual_position;
}
constexpr operator cnnlTransformerLayernormResidualStructure_t() const;
};
constexpr int NO = 0;
constexpr int PRE = 1;
constexpr int POST = 2;
constexpr int CONTAIN = 1;
constexpr int EXCLUDE = 2;
using TPair = std::pair<Helper, LnresEnum>;
constexpr std::array<TPair, 9> pairs = {
TPair{{NO, NO}, CNNL_TRANSFORMER_NO_LAYERNORM_NO_RESIDUAL}, // noResidual
{{NO, CONTAIN}, CNNL_TRANSFORMER_NO_LAYERNORM_WITH_RESIDUAL}, // useInputAsResidual
{{NO, EXCLUDE}, CNNL_TRANSFORMER_NO_LAYERNORM_WITH_RESIDUAL}, // useInputAsResidual
{{PRE, NO}, CNNL_TRANSFORMER_PRE_LAYERNORM_NO_RESIDUAL}, // noResidual
{{PRE, CONTAIN}, CNNL_TRANSFORMER_PRE_LAYERNORM_INSIDE_RESIDUAL}, // useInputAsResidual
// residualThenLayernorm
{{PRE, EXCLUDE}, CNNL_TRANSFORMER_PRE_LAYERNORM_OUTSIDE_RESIDUAL}, // useLayernormAsResidual
// residualThenLayernorm
{{POST, NO}, CNNL_TRANSFORMER_POST_LAYERNORM_NO_RESIDUAL}, // noResidual
{{POST, CONTAIN}, CNNL_TRANSFORMER_POST_LAYERNORM_INSIDE_RESIDUAL}, // useInputAsResidual
// layernormThenResidual
{{POST, EXCLUDE}, CNNL_TRANSFORMER_POST_LAYERNORM_OUTSIDE_RESIDUAL}, // useInputAsResidual
// residualThenLayernorm
};
constexpr Helper from(LnresEnum mode) {
for (size_t i = 0; i < pairs.size(); ++i) {
if (pairs[i].second == mode) {
return pairs[i].first;
}
}
// throw TmoException("Invalid cnnlTransformerLayernormResidualStructure_t");
return Helper(NO, NO);
}
constexpr LnresEnum to(Helper mode) {
for (size_t i = 0; i < pairs.size(); ++i) {
if (pairs[i].first == mode) {
return pairs[i].second;
}
}
return CNNL_TRANSFORMER_NO_LAYERNORM_NO_RESIDUAL;
// throw TmoException("Invalid Helper");
}
constexpr Helper::Helper(LnresEnum mode) : Helper(from(mode)) {}
constexpr Helper::operator LnresEnum() const {
return to(*this);
}
} // namespace internal
using namespace internal;
inline LnresEnum makeLnresEnum(bool has_ln, bool has_residual, bool residual_is_input) {
return Helper(has_ln ? PRE : NO, has_residual ? (residual_is_input ? CONTAIN : EXCLUDE) : NO);
}
inline LnresEnum removeResidual(LnresEnum mode) {
Helper helper(mode);
return Helper(helper.layernorm_position, NO);
}
inline LnresEnum removeLayernorm(LnresEnum mode) {
Helper helper(mode);
return Helper(NO, helper.residual_position);
}
inline bool useLayernormAsResidual(LnresEnum mode) {
Helper helper(mode);
return helper.layernorm_position == PRE && helper.residual_position == EXCLUDE;
}
inline bool useInputAsResidual(LnresEnum mode) {
Helper helper(mode);
return helper.residual_position == CONTAIN ||
(helper.layernorm_position == NO && helper.residual_position != NO) ||
(helper.layernorm_position == POST && helper.residual_position == EXCLUDE);
}
inline bool hasResidual(LnresEnum mode) {
Helper helper(mode);
return helper.residual_position != NO;
}
inline bool hasLayernorm(LnresEnum mode) {
Helper helper(mode);
return helper.layernorm_position != NO;
}
inline bool isPostLayernorm(LnresEnum mode) {
Helper helper(mode);
return helper.layernorm_position == POST;
}
inline bool isPreLayernorm(LnresEnum mode) {
Helper helper(mode);
return helper.layernorm_position == PRE;
}
inline bool residualThenLayernorm(LnresEnum first_layer, LnresEnum second_layer) {
Helper h1(first_layer);
Helper h2(second_layer);
if (h1.residual_position == NO) { // h1 has no residual
return false;
}
if (h1.layernorm_position == POST && h2.layernorm_position == PRE) {
throw TmoException("too many layernorms");
}
return (h1.residual_position != NO && h1.layernorm_position != POST &&
h2.layernorm_position == PRE) || // l1 residual + l2 pre layernorm
(h1.layernorm_position == POST && h2.layernorm_position != PRE &&
h1.residual_position == EXCLUDE); // l1 inside residual + l1 post layernorm
}
inline bool layernormThenResidual(LnresEnum first_layer, LnresEnum second_layer) {
Helper h1(first_layer);
Helper h2(second_layer);
if (h1.residual_position == NO) { // h1 has no residual
return false;
}
if (h1.layernorm_position == POST && h2.layernorm_position == PRE) {
throw TmoException("too many layernorms");
}
return (h1.layernorm_position == POST && h1.residual_position == CONTAIN);
}
inline bool residualOnly(LnresEnum first_layer, LnresEnum second_layer) {
Helper h1(first_layer);
Helper h2(second_layer);
return h1.residual_position != NO && h1.layernorm_position != POST &&
h2.layernorm_position != PRE;
}
} // namespace lnres
} // namespace tmo
#ifndef CNNL_CHECK
#define CNNL_CHECK(expr) \
if (expr != CNNL_STATUS_SUCCESS) { \
std::cerr << __FILE__ << ":" << __LINE__ \
<< " Check failed: " #expr " == CNNL_STATUS_SUCCESS. " << std::endl; \
}
#endif
#define CNNL_CHECK_FATAL(expr) \
if ((expr) != CNNL_STATUS_SUCCESS) { \
std::cerr << __FILE__ << ":" << __LINE__ << ": " \
<< " Check failed: " #expr " == CNNL_STATUS_SUCCESS. " << std::endl; \
throw TmoException("Check failed: " #expr " == CNNL_STATUS_SUCCESS."); \
}
#define TMO_KERNEL_CHECK_FATAL(expr) \
if ((expr) != tmo::KernelStatus::KERNEL_STATUS_SUCCESS) { \
std::cerr << __FILE__ << ":" << __LINE__ << ": " \
<< " Check failed: " #expr " == KernelStatus::KERNEL_STATUS_SUCCESS. " << std::endl; \
throw TmoException("Check failed: " #expr " == KernelStatus::KERNEL_STATUS_SUCCESS."); \
}
#define CHECK_FATAL(expr, ...) \
if (!(expr)) { \
std::cerr << __FILE__ << ":" << __LINE__ << ": " \
<< " Check failed: " #expr ". " << tmo::stringize(__VA_ARGS__) << std::endl; \
throw TmoException("Check failed: " #expr ". " + tmo::stringize(__VA_ARGS__)); \
}
#undef CNRT_CHECK
#define CNRT_CHECK(val) \
do { \
cnrtRet_t __ret = val; \
if (__ret) { \
printf("[%s:%d] CNRT error, code=%d(%s) \"%s\" \n", __FILE__, __LINE__, (unsigned int)__ret, \
cnrtGetErrorStr(__ret), #val); \
throw TmoException(cnrtGetErrorStr(__ret)); \
} \
} while (0)
#define CN_CHECK(val) \
do { \
CNresult __ret = val; \
if (__ret) { \
const char *cn_err_string = nullptr; \
cnGetErrorString(__ret, &cn_err_string); \
printf("[%s:%d] CN error, code=%d(%s) \"%s\" \n", __FILE__, __LINE__, (unsigned int)__ret, \
cn_err_string, #val); \
throw TmoException(cn_err_string); \
} \
} while (0)
#define PAD_UP_DIV(x, y) (((x) + (y) - 1) / (y))
#define TMO_EXPORT __attribute__((__visibility__("default")))
#define TMO_HIDDEN __attribute__((__visibility__("hidden")))
#define DELETE_COPY_ASSIGN_CONSTRUCT(CLASSNAME) \
CLASSNAME(const CLASSNAME &) = delete; \
CLASSNAME(CLASSNAME &&) = delete; \
CLASSNAME &operator=(const CLASSNAME &) = delete; \
CLASSNAME &operator=(CLASSNAME &&) = delete;
// Note: Return type without const when const object called.
#define CLASS_CAST_TYPE_OPERATOR_DEFINE(DESCNAME, DESCOBJECT) \
inline operator DESCNAME() const { \
return const_cast<DESCNAME>(DESCOBJECT); \
} \
inline operator DESCNAME() { \
return DESCOBJECT; \
}
#endif // CSRC_COMMON_UTILS_H_