sync from b7516
This commit is contained in:
@@ -402,20 +402,12 @@ static std::string var_to_str(ggml_op_pool pool) {
|
||||
}
|
||||
|
||||
static std::string var_to_str(ggml_scale_mode mode) {
|
||||
std::string str;
|
||||
switch (mode & 0xFF) {
|
||||
case GGML_SCALE_MODE_NEAREST: str = "nearest"; break;
|
||||
case GGML_SCALE_MODE_BILINEAR: str = "bilinear"; break;
|
||||
case GGML_SCALE_MODE_BICUBIC: str = "bicubic"; break;
|
||||
default: str = std::to_string(mode); break;
|
||||
switch (mode) {
|
||||
case GGML_SCALE_MODE_NEAREST: return "nearest";
|
||||
case GGML_SCALE_MODE_BILINEAR: return "bilinear";
|
||||
case GGML_SCALE_MODE_BICUBIC: return "bicubic";
|
||||
default: return std::to_string(mode);
|
||||
}
|
||||
if (mode & GGML_SCALE_FLAG_ALIGN_CORNERS) {
|
||||
str += "|align_corners";
|
||||
}
|
||||
if (mode & GGML_SCALE_FLAG_ANTIALIAS) {
|
||||
str += "|antialias";
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
#define VAR_TO_STR(x) (#x "=" + var_to_str(x))
|
||||
@@ -454,28 +446,6 @@ static bool ggml_is_view_op(enum ggml_op op) {
|
||||
return op == GGML_OP_VIEW || op == GGML_OP_RESHAPE || op == GGML_OP_PERMUTE || op == GGML_OP_TRANSPOSE;
|
||||
}
|
||||
|
||||
static bool backend_has_feature(ggml_backend_t backend, const char * feature_name) {
|
||||
ggml_backend_dev_t dev = ggml_backend_get_device(backend);
|
||||
ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
|
||||
|
||||
auto get_features = (ggml_backend_get_features_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_get_features");
|
||||
if (!get_features) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const ggml_backend_feature * features = get_features(reg);
|
||||
if (!features) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (const ggml_backend_feature * f = features; f->name; ++f) {
|
||||
if (strcmp(f->name, feature_name) == 0 && strcmp(f->value, "1") == 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
enum test_mode {
|
||||
MODE_TEST,
|
||||
MODE_PERF,
|
||||
@@ -1123,11 +1093,6 @@ struct test_case {
|
||||
return 1e-7;
|
||||
}
|
||||
|
||||
virtual double max_nmse_err(ggml_backend_t backend) {
|
||||
GGML_UNUSED(backend);
|
||||
return max_nmse_err();
|
||||
}
|
||||
|
||||
virtual double max_maa_err() {
|
||||
return 1e-4;
|
||||
}
|
||||
@@ -1136,10 +1101,6 @@ struct test_case {
|
||||
return max_nmse_err();
|
||||
}
|
||||
|
||||
virtual double max_err(ggml_backend_t backend) {
|
||||
return max_nmse_err(backend);
|
||||
}
|
||||
|
||||
virtual double err(const float * a, const float * b, size_t n) {
|
||||
return nmse(a, b, n);
|
||||
}
|
||||
@@ -1189,7 +1150,6 @@ struct test_case {
|
||||
}
|
||||
|
||||
virtual bool run_whole_graph() { return false; }
|
||||
virtual std::vector<ggml_tensor *> fusion_test_nodes() { return {}; }
|
||||
|
||||
ggml_cgraph * gf = nullptr;
|
||||
ggml_cgraph * gb = nullptr;
|
||||
@@ -1409,8 +1369,8 @@ struct test_case {
|
||||
}
|
||||
|
||||
double err = ud->tc->err(f1.data(), f2.data(), f1.size());
|
||||
if (err > ud->tc->max_err(ud->backend1)) {
|
||||
printf("[%s] ERR = %.9f > %.9f ", ggml_op_desc(t1), err, ud->tc->max_err(ud->backend1));
|
||||
if (err > ud->tc->max_err()) {
|
||||
printf("[%s] ERR = %.9f > %.9f ", ggml_op_desc(t1), err, ud->tc->max_err());
|
||||
//for (int i = 0; i < (int) f1.size(); i++) {
|
||||
// printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
|
||||
//}
|
||||
@@ -1423,13 +1383,7 @@ struct test_case {
|
||||
GGML_UNUSED(index);
|
||||
};
|
||||
|
||||
std::vector<ggml_tensor *> fused_nodes_to_verify = fusion_test_nodes();
|
||||
if (fused_nodes_to_verify.size() == 0 && run_whole_graph()) {
|
||||
fused_nodes_to_verify.push_back(out);
|
||||
}
|
||||
const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud,
|
||||
run_whole_graph() ? fused_nodes_to_verify.data() : nullptr,
|
||||
fused_nodes_to_verify.size());
|
||||
const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud, run_whole_graph() ? out : nullptr);
|
||||
|
||||
ggml_backend_buffer_free(buf);
|
||||
|
||||
@@ -3462,65 +3416,6 @@ struct test_rms_norm_mul_add : public test_case {
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_ADD + GGML_OP_RMS_NORM (fused operation)
|
||||
struct test_add_rms_norm : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
const float eps;
|
||||
const bool broadcast;
|
||||
|
||||
std::string op_desc(ggml_tensor * t) override {
|
||||
GGML_UNUSED(t);
|
||||
return "ADD_RMS_NORM";
|
||||
}
|
||||
|
||||
bool run_whole_graph() override { return true; }
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR4(type, ne, eps, broadcast);
|
||||
}
|
||||
|
||||
test_add_rms_norm(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {64, 5, 4, 3},
|
||||
float eps = 1e-6f, bool broadcast = false)
|
||||
: type(type), ne(ne), eps(eps), broadcast(broadcast) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
std::array<int64_t, 4> broadcast_dims = {ne[0]*2, ne[1]*3, ne[2]*3, ne[3]*4};
|
||||
|
||||
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, broadcast ? broadcast_dims.data() : ne.data());
|
||||
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
|
||||
ggml_set_param(a);
|
||||
ggml_set_name(a, "a");
|
||||
ggml_set_param(b);
|
||||
ggml_set_name(b, "b");
|
||||
|
||||
// ADD operation followed by RMS_NORM
|
||||
ggml_tensor * add_result = ggml_add(ctx, a, b);
|
||||
ggml_set_name(add_result, "add_result");
|
||||
|
||||
ggml_tensor * out = ggml_rms_norm(ctx, add_result, eps);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
void initialize_tensors(ggml_context * ctx) override {
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
init_tensor_uniform(t, -10.f, 10.f);
|
||||
}
|
||||
}
|
||||
|
||||
float grad_eps() override {
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
bool grad_precise() override {
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_SSM_CONV
|
||||
struct test_ssm_conv : public test_case {
|
||||
const ggml_type type;
|
||||
@@ -3717,14 +3612,6 @@ struct test_mul_mat : public test_case {
|
||||
return 5e-4;
|
||||
}
|
||||
|
||||
double max_nmse_err(ggml_backend_t backend) override {
|
||||
// for blackwell we quantize activations to mxfp4 instead of q8_1 so we add higher tolerance
|
||||
if (type_a == GGML_TYPE_MXFP4 && backend_has_feature(backend, "BLACKWELL_NATIVE_FP4")) {
|
||||
return 2e-2;
|
||||
}
|
||||
return max_nmse_err();
|
||||
}
|
||||
|
||||
int64_t grad_nmax() override {
|
||||
return 20000;
|
||||
}
|
||||
@@ -3853,14 +3740,6 @@ struct test_mul_mat_id : public test_case {
|
||||
return 5e-4;
|
||||
}
|
||||
|
||||
double max_nmse_err(ggml_backend_t backend) override {
|
||||
// for blackwell we quantize activations to mxfp4 instead of q8_1 so we add higher tolerance
|
||||
if (type_a == GGML_TYPE_MXFP4 && backend_has_feature(backend, "BLACKWELL_NATIVE_FP4")) {
|
||||
return 2e-2;
|
||||
}
|
||||
return max_nmse_err();
|
||||
}
|
||||
|
||||
uint64_t op_flops(ggml_tensor * t) override {
|
||||
GGML_UNUSED(t);
|
||||
return 2 * m * k * n * n_used;
|
||||
@@ -5293,8 +5172,6 @@ struct test_topk_moe : public test_case {
|
||||
const bool bias_probs;
|
||||
const MoeGatingFunc gating_func;
|
||||
const float scale_w;
|
||||
ggml_tensor * weights {};
|
||||
ggml_tensor * selected_experts {};
|
||||
|
||||
test_topk_moe(std::array<int64_t, 4> ne = { 10, 5, 1, 1 },
|
||||
int n_expert_used = 1,
|
||||
@@ -5332,16 +5209,16 @@ struct test_topk_moe : public test_case {
|
||||
|
||||
ggml_tensor * selection_probs = probs;
|
||||
if (bias_probs) {
|
||||
ggml_tensor * exp_probs_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ne[0]);
|
||||
ggml_tensor * exp_probs_b = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne.data());
|
||||
ggml_set_name(exp_probs_b, "exp_probs_b");
|
||||
selection_probs = ggml_add(ctx, probs, exp_probs_b);
|
||||
ggml_set_name(selection_probs, "selection_probs");
|
||||
}
|
||||
|
||||
selected_experts = ggml_argsort_top_k(ctx, selection_probs, n_expert_used); // [n_expert_used, n_tokens]
|
||||
ggml_tensor * selected_experts = ggml_argsort_top_k(ctx, selection_probs, n_expert_used); // [n_expert_used, n_tokens]
|
||||
ggml_set_name(selected_experts, "selected_experts");
|
||||
|
||||
weights = ggml_get_rows(ctx, ggml_reshape_3d(ctx, probs, 1, n_expert, n_tokens), selected_experts); // [1, n_expert_used, n_tokens]
|
||||
ggml_tensor * weights = ggml_get_rows(ctx, ggml_reshape_3d(ctx, probs, 1, n_expert, n_tokens), selected_experts); // [1, n_expert_used, n_tokens]
|
||||
ggml_set_name(weights, "weights");
|
||||
|
||||
if (gating_func == GATING_FUNC_SOFTMAX_WEIGHT) {
|
||||
@@ -5367,21 +5244,6 @@ struct test_topk_moe : public test_case {
|
||||
ggml_set_name(weights, "weights");
|
||||
return weights;
|
||||
}
|
||||
// Verify two outputs
|
||||
std::vector<ggml_tensor *> fusion_test_nodes() override { return { selected_experts, weights }; }
|
||||
|
||||
// allow output in arbitrary order
|
||||
double err(const float * a, const float * b, size_t n) override {
|
||||
std::vector<float> a2(n);
|
||||
std::vector<float> b2(n);
|
||||
for (size_t i = 0; i < n; ++i) {
|
||||
a2[i] = a[i];
|
||||
b2[i] = b[i];
|
||||
}
|
||||
std::sort(a2.begin(), a2.end());
|
||||
std::sort(b2.begin(), b2.end());
|
||||
return nmse(a2.data(), b2.data(), n);
|
||||
}
|
||||
};
|
||||
|
||||
struct test_mul_mat_vec_fusion : public test_case {
|
||||
@@ -5673,16 +5535,18 @@ struct test_interpolate : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
const std::array<int64_t, 4> ne_tgt;
|
||||
const ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST;
|
||||
const uint32_t mode = GGML_SCALE_MODE_NEAREST;
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR4(type, ne, ne_tgt, mode);
|
||||
ggml_scale_mode mode = (ggml_scale_mode)(this->mode & 0xFF);
|
||||
std::string flags = (this->mode & GGML_SCALE_FLAG_ALIGN_CORNERS) ? "align_corners" : "none";
|
||||
return VARS_TO_STR5(type, ne, ne_tgt, mode, flags);
|
||||
}
|
||||
|
||||
test_interpolate(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {2, 5, 7, 11},
|
||||
std::array<int64_t, 4> ne_tgt = {5, 7, 11, 13},
|
||||
ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST)
|
||||
uint32_t mode = GGML_SCALE_MODE_NEAREST)
|
||||
: type(type), ne(ne), ne_tgt(ne_tgt), mode(mode) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
@@ -7482,35 +7346,28 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_softcap(GGML_TYPE_F32, {10, 10, 10, 10}, 50.0f));
|
||||
test_cases.emplace_back(new test_silu_back());
|
||||
|
||||
for (float eps : { 0.0f, 1e-6f, 1e-4f, 1e-1f }) {
|
||||
for (uint32_t n : { 64, 1025 }) {
|
||||
for (bool v : { false, true }) {
|
||||
test_cases.emplace_back(new test_norm(GGML_TYPE_F32, { n, 5, 4, 3 }, v, eps));
|
||||
test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, { n, 5, 4, 3 }, v, eps));
|
||||
}
|
||||
test_cases.emplace_back(new test_rms_norm_back(GGML_TYPE_F32, { n, 5, 4, 3 }, eps));
|
||||
test_cases.emplace_back(new test_l2_norm(GGML_TYPE_F32, { n, 5, 4, 3 }, eps));
|
||||
for (float eps : {0.0f, 1e-6f, 1e-4f, 1e-1f}) {
|
||||
for (bool v : {false, true}) {
|
||||
test_cases.emplace_back(new test_norm (GGML_TYPE_F32, {64, 5, 4, 3}, v, eps));
|
||||
test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, {64, 5, 4, 3}, v, eps));
|
||||
}
|
||||
test_cases.emplace_back(new test_rms_norm_back(GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
||||
test_cases.emplace_back(new test_l2_norm (GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
||||
}
|
||||
|
||||
// in-place tests
|
||||
test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, {64, 5, 4, 3}, false, 1e-6f, true));
|
||||
|
||||
for (float eps : { 0.0f, 1e-6f, 1e-4f, 1e-1f, 1.0f }) {
|
||||
for (uint32_t n : { 64, 1025 }) {
|
||||
test_cases.emplace_back(new test_rms_norm_mul_add(GGML_TYPE_F32, { n, 5, 4, 3 }, eps, false));
|
||||
test_cases.emplace_back(new test_rms_norm_mul_add(GGML_TYPE_F32, { n, 5, 4, 3 }, eps, true));
|
||||
test_cases.emplace_back(new test_norm_mul_add(GGML_TYPE_F32, { n, 5, 4, 3 }, eps, false));
|
||||
test_cases.emplace_back(new test_norm_mul_add(GGML_TYPE_F32, { n, 5, 4, 3 }, eps, true));
|
||||
test_cases.emplace_back(new test_add_rms_norm(GGML_TYPE_F32, { n, 5, 4, 3 }, eps, false));
|
||||
test_cases.emplace_back(new test_add_rms_norm(GGML_TYPE_F32, { n, 5, 4, 3 }, eps, true));
|
||||
}
|
||||
for (float eps : {0.0f, 1e-6f, 1e-4f, 1e-1f, 1.0f}) {
|
||||
test_cases.emplace_back(new test_rms_norm_mul_add(GGML_TYPE_F32, {64, 5, 4, 3}, eps, false));
|
||||
test_cases.emplace_back(new test_rms_norm_mul_add(GGML_TYPE_F32, {64, 5, 4, 3}, eps, true));
|
||||
test_cases.emplace_back(new test_norm_mul_add(GGML_TYPE_F32, {64, 5, 4, 3}, eps, false));
|
||||
test_cases.emplace_back(new test_norm_mul_add(GGML_TYPE_F32, {64, 5, 4, 3}, eps, true));
|
||||
}
|
||||
for (uint32_t n : {1, 511, 1025, 8192, 33*512}) {
|
||||
for (bool multi_add : {false, true}) {
|
||||
test_cases.emplace_back(new test_rms_norm_mul_add(GGML_TYPE_F32, {n, 1, 1, 1}, 1e-6f, false, multi_add));
|
||||
}
|
||||
test_cases.emplace_back(new test_add_rms_norm(GGML_TYPE_F32, {n, 1, 1, 1}, 1e-6f, false));
|
||||
}
|
||||
|
||||
for (auto multi_add : {false, true}) {
|
||||
@@ -7528,6 +7385,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_l2_norm(GGML_TYPE_F32, {64, 5, 4, 3}, 1e-12f));
|
||||
|
||||
for (int64_t d_conv : {3, 4, 9}) {
|
||||
for (int64_t d_inner: {1024, 1536, 2048}) {
|
||||
test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {d_conv, d_inner, 1, 1}, {d_conv, d_inner, 1, 1}));
|
||||
@@ -7561,11 +7421,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F16, 1700000, 96, 2592, {1, 1}, {1, 1}));
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F16, 1700000, 3, 2592, {1, 1}, {1, 1}));
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F16, 1700000, 1, 2592, {1, 1}, {1, 1}));
|
||||
|
||||
test_cases.emplace_back(new test_mul_mat_id(GGML_TYPE_Q8_0, GGML_TYPE_F32, 128, 128, false, 8192, 2, 5120)); // Llama-4-Maverick-17B-128E-PAB-Q8_0
|
||||
test_cases.emplace_back(new test_mul_mat_id(GGML_TYPE_Q8_0, GGML_TYPE_F32, 128, 128, false, 8192, 1, 5120)); // Llama-4-Maverick-17B-128E-PAB-Q8_0
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 8192, 1, 5120, {128, 1}, {1, 1}));
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 8192, 512, 5120, {128, 1}, {1, 1}));
|
||||
#endif
|
||||
|
||||
for (ggml_type type_a : all_types) {
|
||||
@@ -7678,10 +7533,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 64, 77, 77, {12,1}, {1,1}));
|
||||
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q4_0, GGML_TYPE_F32, 576, 512, 576, {1,1}, {1,1}));
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q4_0, GGML_TYPE_F32, 1, 2048, 8192, {1, 1}, {1, 1}));
|
||||
for (ggml_type type_a : all_types) {
|
||||
test_cases.emplace_back(new test_mul_mat(type_a, GGML_TYPE_F32, 1, 64, 256, {1, 1}, {1, 1}));
|
||||
}
|
||||
|
||||
#if 0
|
||||
// test the mat-mat path for Metal
|
||||
@@ -7894,11 +7745,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, true, GGML_TYPE_F32, {1, 1}, 0.1f, 8.0f));
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, true, GGML_TYPE_F16, {1, 1}, 0.1f, 8.0f));
|
||||
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {200001, 2, 3, 1}, true, true, GGML_TYPE_F32, {1, 1}, 0.1f, 8.0f));
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {200001, 2, 3, 1}, true, true, GGML_TYPE_F16, {1, 1}, 0.1f, 8.0f));
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {200000, 1, 1, 1}, false, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f));
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {200000, 4, 1, 1}, false, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f));
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {643251, 3, 1, 1}, false, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f));
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {200001, 2, 3, 1}, true, true, GGML_TYPE_F32, {1, 1}, 0.1f, 8.0f));
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {200001, 2, 3, 1}, true, true, GGML_TYPE_F16, {1, 1}, 0.1f, 8.0f));
|
||||
|
||||
for (float max_bias : {0.0f, 8.0f}) {
|
||||
for (float scale : {1.0f, 0.1f}) {
|
||||
@@ -7927,7 +7775,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_rope(type, {128, 40, 2, 1}, 128, GGML_ROPE_TYPE_NORMAL, 512, fs, ef, af, ff, v, fw)); // llama 13B
|
||||
test_cases.emplace_back(new test_rope(type, {128, 52, 2, 1}, 128, GGML_ROPE_TYPE_NORMAL, 512, fs, ef, af, ff, v, fw)); // llama 30B
|
||||
test_cases.emplace_back(new test_rope(type, {128, 64, 2, 1}, 128, GGML_ROPE_TYPE_NORMAL, 512, fs, ef, af, ff, v, fw)); // llama 65B
|
||||
test_cases.emplace_back(new test_rope(type, {16, 16, 8192, 1}, 16, GGML_ROPE_TYPE_NORMAL, 512, fs, ef, af, ff, v, fw));
|
||||
}
|
||||
|
||||
if (all) {
|
||||
@@ -7942,7 +7789,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_rope(type, { 80, 32, 2, 1}, 20, GGML_ROPE_TYPE_NEOX, 512, fs, ef, af, ff, v, fw)); // neox (stablelm)
|
||||
test_cases.emplace_back(new test_rope(type, { 80, 32, 2, 1}, 32, GGML_ROPE_TYPE_NEOX, 512, fs, ef, af, ff, v, fw)); // neox (phi-2)
|
||||
test_cases.emplace_back(new test_rope(type, { 80, 32, 4, 1}, 32, GGML_ROPE_TYPE_NEOX, 512, fs, ef, af, ff, v, fw)); // neox (phi-2)
|
||||
test_cases.emplace_back(new test_rope(type, { 16, 16, 8192, 1}, 16, GGML_ROPE_TYPE_NEOX, 512, fs, ef, af, ff, v, fw));
|
||||
}
|
||||
|
||||
if (all) {
|
||||
@@ -7956,7 +7802,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_rope(type, {128, 28, 2, 1}, 32, GGML_ROPE_TYPE_IMROPE, 512, fs, ef, af, ff, v, fw));
|
||||
test_cases.emplace_back(new test_rope(type, { 80, 16, 2, 1}, 80, GGML_ROPE_TYPE_VISION, 512, fs, ef, af, ff, v, fw)); // rope_multi,m-rope (qwen2vl ViT)
|
||||
test_cases.emplace_back(new test_rope(type, {128, 16, 2, 1}, 128, GGML_ROPE_TYPE_IMROPE, 512, fs, ef, af, ff, v, fw)); // rope_multi,m-rope (qwen3vl)
|
||||
test_cases.emplace_back(new test_rope(type, {16, 16, 8192, 1}, 16, GGML_ROPE_TYPE_IMROPE, 512, fs, ef, af, ff, v, fw));
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_rope(type, { 64, 128, 2, 1}, 64, GGML_ROPE_TYPE_NEOX, 512, fs, ef, af, ff, v, fw)); // neox (falcon 40B)
|
||||
@@ -8002,11 +7847,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {2, 8, 8192, 1}, order)); // bailingmoe2 (group selection)
|
||||
}
|
||||
|
||||
for (int n = 1; n < 5; ++n) {
|
||||
for (int k = 1; k <= n; ++k) {
|
||||
test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {n, 2, 1, 3}, k, true));
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < 20; ++i) {
|
||||
for (int k : {1, 2, 3, 7, 15, 100, 500, 1023, 9999}) {
|
||||
if (k <= 1<<i) {
|
||||
@@ -8040,9 +7880,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {5, 7, 11, 13}, {2, 5, 7, 11}, mode));
|
||||
}
|
||||
for (ggml_scale_mode mode : {GGML_SCALE_MODE_BILINEAR, GGML_SCALE_MODE_BICUBIC}) {
|
||||
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS)));
|
||||
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {1, 4, 3, 2}, {2, 8, 3, 2}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS)));
|
||||
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {4, 1, 3, 2}, {1, 1, 3, 2}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS)));
|
||||
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS));
|
||||
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {1, 4, 3, 2}, {2, 8, 3, 2}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS));
|
||||
test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {4, 1, 3, 2}, {1, 1, 3, 2}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS));
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_sum());
|
||||
@@ -8094,7 +7934,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_cumsum(GGML_TYPE_F32, { 2048, 5, 4, 3 }));
|
||||
test_cases.emplace_back(new test_cumsum(GGML_TYPE_F32, { 201*1204, 1, 1, 1 }));
|
||||
test_cases.emplace_back(new test_cumsum(GGML_TYPE_F32, { 312*1205, 1, 1, 1 }));
|
||||
test_cases.emplace_back(new test_cumsum(GGML_TYPE_F32, { 20481, 4, 1, 1 }));
|
||||
|
||||
test_cases.emplace_back(new test_xielu());
|
||||
|
||||
@@ -8237,7 +8076,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_topk_moe({71, 22, 1, 1}, 8, with_norm, bias_probs, gate, scale_w));
|
||||
test_cases.emplace_back(new test_topk_moe({128, 1, 1, 1}, 128, with_norm, bias_probs, gate, scale_w));
|
||||
test_cases.emplace_back(new test_topk_moe({129, 1, 1, 1}, 128, with_norm, bias_probs, gate, scale_w));
|
||||
test_cases.emplace_back(new test_topk_moe({160, 4, 1, 1}, 160, with_norm, bias_probs, gate, scale_w));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -8423,12 +8261,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
||||
}
|
||||
}
|
||||
|
||||
for (int col : {8192, 16384, 32768, 65536, 131072, 262144, 524288}) {
|
||||
for (int rows : {1, 4, 16}){
|
||||
test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {col, rows, 1, 1}, false, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f));
|
||||
}
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, false));
|
||||
test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, true));
|
||||
|
||||
@@ -8472,9 +8304,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
||||
test_cases.emplace_back(new test_sum(GGML_TYPE_F32, it));
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {65000, 16, 1, 1}));
|
||||
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {200000, 1, 1, 1}));
|
||||
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {200000, 16, 1, 1}));
|
||||
test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {65000, 16, 1, 1}));
|
||||
|
||||
test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {2, 1, 1, 1}, 1));
|
||||
for (auto k : {1, 10, 40, 400}) {
|
||||
@@ -8485,18 +8315,13 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
|
||||
}
|
||||
}
|
||||
|
||||
for (auto nrows : {1, 4, 8, 16}) {
|
||||
for (auto cols : {128, 1024, 4096, 8192, 16384, 32768, 65536, 131072, 200000, 2000000}) {
|
||||
test_cases.emplace_back(new test_cumsum(GGML_TYPE_F32, {cols, nrows, 1, 1}));
|
||||
}
|
||||
}
|
||||
|
||||
// Examples from granite-4.0-h-1b/ggml-model-Q8_0.gguf
|
||||
test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {515, 3328, 1, 1}, {4, 3328, 1, 1})); // prefill
|
||||
test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, 3328, 1, 1}, {4, 3328, 1, 1})); // generate
|
||||
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 48, 1, 512, 1)); // prefill
|
||||
test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 48, 1, 1, 1)); // generate
|
||||
|
||||
|
||||
return test_cases;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user