294 lines
12 KiB
C++
294 lines
12 KiB
C++
/*************************************************************************
|
|
* Copyright (C) [2023-2024] by Cambricon, Inc.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
|
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
|
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
|
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
|
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
|
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
*************************************************************************/
|
|
#ifndef CSRC_COMMON_UTILS_H_
|
|
#define CSRC_COMMON_UTILS_H_
|
|
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <exception>
|
|
#include <functional>
|
|
#include <future> // NOLINT
|
|
#include <initializer_list>
|
|
#include <iostream>
|
|
#include <map>
|
|
#include <memory>
|
|
#include <stdexcept>
|
|
#include <string>
|
|
#include <thread> // NOLINT
|
|
#include <tuple>
|
|
#include <vector>
|
|
#include "cn_api.h"
|
|
#include "cnnl.h"
|
|
#include "cnnl_extra.h"
|
|
#include "cnrt.h"
|
|
#include "stack_exception.h"
|
|
|
|
namespace tmo {
|
|
inline cnnlQuantizeLayout_t strToQuantizeLayout(std::string param) {
|
|
static std::map<std::string, cnnlQuantizeLayout_t> quantize_layout_map = {
|
|
{"quantize_none", CNNL_QUANTIZE_NONE},
|
|
{"quantize_per_tensor", CNNL_QUANTIZE_PER_TENSOR},
|
|
{"quantize_per_channel", CNNL_QUANTIZE_PER_CHANNEL},
|
|
{"quantize_per_token", CNNL_QUANTIZE_PER_TOKEN},
|
|
{"quantize_group_wise", CNNL_QUANTIZE_GROUP_WISE}};
|
|
return quantize_layout_map[param];
|
|
}
|
|
|
|
inline cnnlActivationMode_t strToActivationMode(std::string param) {
|
|
static std::map<std::string, cnnlActivationMode_t> act_mode_map = {
|
|
{"gelu", CNNL_ACTIVATION_GELU},
|
|
{"relu", CNNL_ACTIVATION_RELU},
|
|
{"sigmoid", CNNL_ACTIVATION_SIGMOID},
|
|
{"silu", CNNL_ACTIVATION_SWISH},
|
|
{"none", CNNL_ACTIVATION_IDENTITY}};
|
|
return act_mode_map[param];
|
|
}
|
|
inline cnnlLLMQuantAlgo_t strToQuantizeAlgo(std::string param) {
|
|
static std::map<std::string, cnnlLLMQuantAlgo_t> quant_algo_map = {
|
|
{"weight_only", CNNL_WEIGHT_ONLY},
|
|
{"smooth_quant", CNNL_SMOOTH_QUANT},
|
|
{"none", CNNL_NO_QUANT}};
|
|
return quant_algo_map[param];
|
|
}
|
|
|
|
namespace lnres {
|
|
namespace internal {
|
|
using LnresEnum = cnnlTransformerLayernormResidualStructure_t;
|
|
struct Helper {
|
|
int layernorm_position; // 0: no layernorm, 1: pre layernorm, 2: post layernorm
|
|
int residual_position; // 0: no residual, 1: layernorm inside residual, 2: layernorm outside
|
|
// residual
|
|
constexpr Helper(cnnlTransformerLayernormResidualStructure_t mode);
|
|
|
|
constexpr Helper(int layernorm_position, int residual_position)
|
|
: layernorm_position(layernorm_position), residual_position(residual_position) {}
|
|
|
|
constexpr bool operator==(const Helper &other) const {
|
|
return layernorm_position == other.layernorm_position &&
|
|
residual_position == other.residual_position;
|
|
}
|
|
|
|
constexpr operator cnnlTransformerLayernormResidualStructure_t() const;
|
|
};
|
|
|
|
constexpr int NO = 0;
|
|
constexpr int PRE = 1;
|
|
constexpr int POST = 2;
|
|
constexpr int CONTAIN = 1;
|
|
constexpr int EXCLUDE = 2;
|
|
using TPair = std::pair<Helper, LnresEnum>;
|
|
constexpr std::array<TPair, 9> pairs = {
|
|
TPair{{NO, NO}, CNNL_TRANSFORMER_NO_LAYERNORM_NO_RESIDUAL}, // noResidual
|
|
{{NO, CONTAIN}, CNNL_TRANSFORMER_NO_LAYERNORM_WITH_RESIDUAL}, // useInputAsResidual
|
|
{{NO, EXCLUDE}, CNNL_TRANSFORMER_NO_LAYERNORM_WITH_RESIDUAL}, // useInputAsResidual
|
|
{{PRE, NO}, CNNL_TRANSFORMER_PRE_LAYERNORM_NO_RESIDUAL}, // noResidual
|
|
{{PRE, CONTAIN}, CNNL_TRANSFORMER_PRE_LAYERNORM_INSIDE_RESIDUAL}, // useInputAsResidual
|
|
// residualThenLayernorm
|
|
{{PRE, EXCLUDE}, CNNL_TRANSFORMER_PRE_LAYERNORM_OUTSIDE_RESIDUAL}, // useLayernormAsResidual
|
|
// residualThenLayernorm
|
|
{{POST, NO}, CNNL_TRANSFORMER_POST_LAYERNORM_NO_RESIDUAL}, // noResidual
|
|
{{POST, CONTAIN}, CNNL_TRANSFORMER_POST_LAYERNORM_INSIDE_RESIDUAL}, // useInputAsResidual
|
|
// layernormThenResidual
|
|
{{POST, EXCLUDE}, CNNL_TRANSFORMER_POST_LAYERNORM_OUTSIDE_RESIDUAL}, // useInputAsResidual
|
|
// residualThenLayernorm
|
|
};
|
|
|
|
constexpr Helper from(LnresEnum mode) {
|
|
for (size_t i = 0; i < pairs.size(); ++i) {
|
|
if (pairs[i].second == mode) {
|
|
return pairs[i].first;
|
|
}
|
|
}
|
|
// throw TmoException("Invalid cnnlTransformerLayernormResidualStructure_t");
|
|
return Helper(NO, NO);
|
|
}
|
|
|
|
constexpr LnresEnum to(Helper mode) {
|
|
for (size_t i = 0; i < pairs.size(); ++i) {
|
|
if (pairs[i].first == mode) {
|
|
return pairs[i].second;
|
|
}
|
|
}
|
|
return CNNL_TRANSFORMER_NO_LAYERNORM_NO_RESIDUAL;
|
|
// throw TmoException("Invalid Helper");
|
|
}
|
|
|
|
constexpr Helper::Helper(LnresEnum mode) : Helper(from(mode)) {}
|
|
|
|
constexpr Helper::operator LnresEnum() const {
|
|
return to(*this);
|
|
}
|
|
} // namespace internal
|
|
|
|
using namespace internal;
|
|
|
|
inline LnresEnum makeLnresEnum(bool has_ln, bool has_residual, bool residual_is_input) {
|
|
return Helper(has_ln ? PRE : NO, has_residual ? (residual_is_input ? CONTAIN : EXCLUDE) : NO);
|
|
}
|
|
|
|
inline LnresEnum removeResidual(LnresEnum mode) {
|
|
Helper helper(mode);
|
|
return Helper(helper.layernorm_position, NO);
|
|
}
|
|
|
|
inline LnresEnum removeLayernorm(LnresEnum mode) {
|
|
Helper helper(mode);
|
|
return Helper(NO, helper.residual_position);
|
|
}
|
|
|
|
inline bool useLayernormAsResidual(LnresEnum mode) {
|
|
Helper helper(mode);
|
|
return helper.layernorm_position == PRE && helper.residual_position == EXCLUDE;
|
|
}
|
|
|
|
inline bool useInputAsResidual(LnresEnum mode) {
|
|
Helper helper(mode);
|
|
return helper.residual_position == CONTAIN ||
|
|
(helper.layernorm_position == NO && helper.residual_position != NO) ||
|
|
(helper.layernorm_position == POST && helper.residual_position == EXCLUDE);
|
|
}
|
|
|
|
inline bool hasResidual(LnresEnum mode) {
|
|
Helper helper(mode);
|
|
return helper.residual_position != NO;
|
|
}
|
|
|
|
inline bool hasLayernorm(LnresEnum mode) {
|
|
Helper helper(mode);
|
|
return helper.layernorm_position != NO;
|
|
}
|
|
|
|
inline bool isPostLayernorm(LnresEnum mode) {
|
|
Helper helper(mode);
|
|
return helper.layernorm_position == POST;
|
|
}
|
|
|
|
inline bool isPreLayernorm(LnresEnum mode) {
|
|
Helper helper(mode);
|
|
return helper.layernorm_position == PRE;
|
|
}
|
|
|
|
inline bool residualThenLayernorm(LnresEnum first_layer, LnresEnum second_layer) {
|
|
Helper h1(first_layer);
|
|
Helper h2(second_layer);
|
|
if (h1.residual_position == NO) { // h1 has no residual
|
|
return false;
|
|
}
|
|
|
|
if (h1.layernorm_position == POST && h2.layernorm_position == PRE) {
|
|
throw TmoException("too many layernorms");
|
|
}
|
|
|
|
return (h1.residual_position != NO && h1.layernorm_position != POST &&
|
|
h2.layernorm_position == PRE) || // l1 residual + l2 pre layernorm
|
|
(h1.layernorm_position == POST && h2.layernorm_position != PRE &&
|
|
h1.residual_position == EXCLUDE); // l1 inside residual + l1 post layernorm
|
|
}
|
|
|
|
inline bool layernormThenResidual(LnresEnum first_layer, LnresEnum second_layer) {
|
|
Helper h1(first_layer);
|
|
Helper h2(second_layer);
|
|
if (h1.residual_position == NO) { // h1 has no residual
|
|
return false;
|
|
}
|
|
|
|
if (h1.layernorm_position == POST && h2.layernorm_position == PRE) {
|
|
throw TmoException("too many layernorms");
|
|
}
|
|
return (h1.layernorm_position == POST && h1.residual_position == CONTAIN);
|
|
}
|
|
|
|
inline bool residualOnly(LnresEnum first_layer, LnresEnum second_layer) {
|
|
Helper h1(first_layer);
|
|
Helper h2(second_layer);
|
|
return h1.residual_position != NO && h1.layernorm_position != POST &&
|
|
h2.layernorm_position != PRE;
|
|
}
|
|
} // namespace lnres
|
|
} // namespace tmo
|
|
|
|
#ifndef CNNL_CHECK
|
|
#define CNNL_CHECK(expr) \
|
|
if (expr != CNNL_STATUS_SUCCESS) { \
|
|
std::cerr << __FILE__ << ":" << __LINE__ \
|
|
<< " Check failed: " #expr " == CNNL_STATUS_SUCCESS. " << std::endl; \
|
|
}
|
|
#endif
|
|
|
|
#define CNNL_CHECK_FATAL(expr) \
|
|
if ((expr) != CNNL_STATUS_SUCCESS) { \
|
|
std::cerr << __FILE__ << ":" << __LINE__ << ": " \
|
|
<< " Check failed: " #expr " == CNNL_STATUS_SUCCESS. " << std::endl; \
|
|
throw TmoException("Check failed: " #expr " == CNNL_STATUS_SUCCESS."); \
|
|
}
|
|
|
|
#define TMO_KERNEL_CHECK_FATAL(expr) \
|
|
if ((expr) != tmo::KernelStatus::KERNEL_STATUS_SUCCESS) { \
|
|
std::cerr << __FILE__ << ":" << __LINE__ << ": " \
|
|
<< " Check failed: " #expr " == KernelStatus::KERNEL_STATUS_SUCCESS. " << std::endl; \
|
|
throw TmoException("Check failed: " #expr " == KernelStatus::KERNEL_STATUS_SUCCESS."); \
|
|
}
|
|
|
|
#define CHECK_FATAL(expr, ...) \
|
|
if (!(expr)) { \
|
|
std::cerr << __FILE__ << ":" << __LINE__ << ": " \
|
|
<< " Check failed: " #expr ". " << tmo::stringize(__VA_ARGS__) << std::endl; \
|
|
throw TmoException("Check failed: " #expr ". " + tmo::stringize(__VA_ARGS__)); \
|
|
}
|
|
|
|
#undef CNRT_CHECK
|
|
#define CNRT_CHECK(val) \
|
|
do { \
|
|
cnrtRet_t __ret = val; \
|
|
if (__ret) { \
|
|
printf("[%s:%d] CNRT error, code=%d(%s) \"%s\" \n", __FILE__, __LINE__, (unsigned int)__ret, \
|
|
cnrtGetErrorStr(__ret), #val); \
|
|
throw TmoException(cnrtGetErrorStr(__ret)); \
|
|
} \
|
|
} while (0)
|
|
|
|
#define CN_CHECK(val) \
|
|
do { \
|
|
CNresult __ret = val; \
|
|
if (__ret) { \
|
|
const char *cn_err_string = nullptr; \
|
|
cnGetErrorString(__ret, &cn_err_string); \
|
|
printf("[%s:%d] CN error, code=%d(%s) \"%s\" \n", __FILE__, __LINE__, (unsigned int)__ret, \
|
|
cn_err_string, #val); \
|
|
throw TmoException(cn_err_string); \
|
|
} \
|
|
} while (0)
|
|
|
|
#define PAD_UP_DIV(x, y) (((x) + (y) - 1) / (y))
|
|
|
|
#define TMO_EXPORT __attribute__((__visibility__("default")))
|
|
#define TMO_HIDDEN __attribute__((__visibility__("hidden")))
|
|
|
|
#define DELETE_COPY_ASSIGN_CONSTRUCT(CLASSNAME) \
|
|
CLASSNAME(const CLASSNAME &) = delete; \
|
|
CLASSNAME(CLASSNAME &&) = delete; \
|
|
CLASSNAME &operator=(const CLASSNAME &) = delete; \
|
|
CLASSNAME &operator=(CLASSNAME &&) = delete;
|
|
|
|
// Note: Return type without const when const object called.
|
|
#define CLASS_CAST_TYPE_OPERATOR_DEFINE(DESCNAME, DESCOBJECT) \
|
|
inline operator DESCNAME() const { \
|
|
return const_cast<DESCNAME>(DESCOBJECT); \
|
|
} \
|
|
inline operator DESCNAME() { \
|
|
return DESCOBJECT; \
|
|
}
|
|
|
|
#endif // CSRC_COMMON_UTILS_H_
|