sync from b7516

2026-01-16 11:16:14 +08:00
parent f4ae4cc7da
commit 6ee41dd9e3
380 changed files with 18435 additions and 38806 deletions
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -202,26 +202,15 @@ llama_build_and_test(
 llama_build_and_test(test-regex-partial.cpp)

 if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x")
-    set(MODEL_NAME "tinyllamas/stories15M-q4_0.gguf")
-    set(MODEL_HASH "SHA256=66967fbece6dbe97886593fdbb73589584927e29119ec31f08090732d1861739")
+    llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2)
 else()
-    set(MODEL_NAME "tinyllamas/stories15M-be.Q4_0.gguf")
-    set(MODEL_HASH "SHA256=9aec857937849d976f30397e97eb1cabb53eb9dcb1ce4611ba8247fb5f44c65d")
+    llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-be.Q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2)
 endif()
-set(MODEL_DEST "${CMAKE_BINARY_DIR}/${MODEL_NAME}")

-add_test(NAME test-download-model COMMAND ${CMAKE_COMMAND}
-    -DDEST=${MODEL_DEST}
-    -DNAME=${MODEL_NAME}
-    -DHASH=${MODEL_HASH}
-    -P ${CMAKE_SOURCE_DIR}/cmake/download-models.cmake
-)
-set_tests_properties(test-download-model PROPERTIES FIXTURES_SETUP test-download-model)
-
-llama_build_and_test(test-thread-safety.cpp ARGS -m "${MODEL_DEST}" -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2)
-set_tests_properties(test-thread-safety PROPERTIES FIXTURES_REQUIRED test-download-model)
-
-llama_build_and_test(test-arg-parser.cpp)
+# this fails on windows (github hosted runner) due to curl DLL not found (exit code 0xc0000135)
+if (NOT WIN32)
+    llama_build_and_test(test-arg-parser.cpp)
+endif()

 if (NOT LLAMA_SANITIZE_ADDRESS AND NOT GGML_SCHED_NO_REALLOC)
  # TODO: repair known memory leaks
@@ -230,14 +219,16 @@ endif()
 llama_build_and_test(test-gguf.cpp)
 llama_build_and_test(test-backend-ops.cpp)

-llama_build_and_test(test-model-load-cancel.cpp LABEL "model")
-llama_build_and_test(test-autorelease.cpp       LABEL "model")
-llama_build_and_test(test-backend-sampler.cpp   LABEL "model")
+llama_build_and_test(test-model-load-cancel.cpp  LABEL "model")
+llama_build_and_test(test-autorelease.cpp        LABEL "model")

 # Test for state restore with fragmented KV cache
 # Requires a model, uses same args pattern as test-thread-safety
-llama_build_and_test(test-state-restore-fragmented.cpp LABEL "model" ARGS -m "${MODEL_DEST}")
-set_tests_properties(test-state-restore-fragmented PROPERTIES FIXTURES_REQUIRED test-download-model)
+if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x")
+    llama_build_and_test(test-state-restore-fragmented.cpp LABEL "model" ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf)
+else()
+    llama_build_and_test(test-state-restore-fragmented.cpp LABEL "model" ARGS -hf ggml-org/models -hff tinyllamas/stories15M-be.Q4_0.gguf)
+endif()

 if (NOT GGML_BACKEND_DL)
    # these tests use the backends directly and cannot be built with dynamic loading
--- a/tests/test-arg-parser.cpp
+++ b/tests/test-arg-parser.cpp
@@ -1,6 +1,5 @@
 #include "arg.h"
 #include "common.h"
-#include "download.h"

 #include <string>
 #include <vector>
@@ -128,15 +127,6 @@ int main(void) {
    assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_SPECULATIVE));
    assert(params.speculative.n_max == 123);

-    // multi-value args (CSV)
-    argv = {"binary_name", "--lora", "file1.gguf,\"file2,2.gguf\",\"file3\"\"3\"\".gguf\",file4\".gguf"};
-    assert(true == common_params_parse(argv.size(), list_str_to_char(argv).data(), params, LLAMA_EXAMPLE_COMMON));
-    assert(params.lora_adapters.size() == 4);
-    assert(params.lora_adapters[0].path == "file1.gguf");
-    assert(params.lora_adapters[1].path == "file2,2.gguf");
-    assert(params.lora_adapters[2].path == "file3\"3\".gguf");
-    assert(params.lora_adapters[3].path == "file4\".gguf");
-
 // skip this part on windows, because setenv is not supported
 #ifdef _WIN32
    printf("test-arg-parser: skip on windows build\n");
@@ -173,7 +163,7 @@ int main(void) {
    assert(params.cpuparams.n_threads == 1010);
 #endif // _WIN32

-    printf("test-arg-parser: test download functions\n\n");
+    printf("test-arg-parser: test curl-related functions\n\n");
    const char * GOOD_URL = "http://ggml.ai/";
    const char * BAD_URL  = "http://ggml.ai/404";

--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -402,20 +402,12 @@ static std::string var_to_str(ggml_op_pool pool) {
 }

 static std::string var_to_str(ggml_scale_mode mode) {
-    std::string str;
-    switch (mode & 0xFF) {
-        case GGML_SCALE_MODE_NEAREST:  str = "nearest"; break;
-        case GGML_SCALE_MODE_BILINEAR: str = "bilinear"; break;
-        case GGML_SCALE_MODE_BICUBIC:  str = "bicubic"; break;
-        default:                       str = std::to_string(mode); break;
+    switch (mode) {
+        case GGML_SCALE_MODE_NEAREST:  return "nearest";
+        case GGML_SCALE_MODE_BILINEAR: return "bilinear";
+        case GGML_SCALE_MODE_BICUBIC:  return "bicubic";
+        default:                       return std::to_string(mode);
    }
-    if (mode & GGML_SCALE_FLAG_ALIGN_CORNERS) {
-        str += "|align_corners";
-    }
-    if (mode & GGML_SCALE_FLAG_ANTIALIAS) {
-        str += "|antialias";
-    }
-    return str;
 }

 #define VAR_TO_STR(x) (#x "=" + var_to_str(x))
@@ -454,28 +446,6 @@ static bool ggml_is_view_op(enum ggml_op op) {
    return op == GGML_OP_VIEW || op == GGML_OP_RESHAPE || op == GGML_OP_PERMUTE || op == GGML_OP_TRANSPOSE;
 }

-static bool backend_has_feature(ggml_backend_t backend, const char * feature_name) {
-    ggml_backend_dev_t dev = ggml_backend_get_device(backend);
-    ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
-
-    auto get_features = (ggml_backend_get_features_t) ggml_backend_reg_get_proc_address(reg, "ggml_backend_get_features");
-    if (!get_features) {
-        return false;
-    }
-
-    const ggml_backend_feature * features = get_features(reg);
-    if (!features) {
-        return false;
-    }
-
-    for (const ggml_backend_feature * f = features; f->name; ++f) {
-        if (strcmp(f->name, feature_name) == 0 && strcmp(f->value, "1") == 0) {
-            return true;
-        }
-    }
-    return false;
-}
-
 enum test_mode {
    MODE_TEST,
    MODE_PERF,
@@ -1123,11 +1093,6 @@ struct test_case {
        return 1e-7;
    }

-    virtual double max_nmse_err(ggml_backend_t backend) {
-        GGML_UNUSED(backend);
-        return max_nmse_err();
-    }
-
    virtual double max_maa_err() {
        return 1e-4;
    }
@@ -1136,10 +1101,6 @@ struct test_case {
        return max_nmse_err();
    }

-    virtual double max_err(ggml_backend_t backend) {
-        return max_nmse_err(backend);
-    }
-
    virtual double err(const float * a, const float * b, size_t n) {
        return nmse(a, b, n);
    }
@@ -1189,7 +1150,6 @@ struct test_case {
    }

    virtual bool run_whole_graph() { return false; }
-    virtual std::vector<ggml_tensor *> fusion_test_nodes() { return {}; }

    ggml_cgraph * gf = nullptr;
    ggml_cgraph * gb = nullptr;
@@ -1409,8 +1369,8 @@ struct test_case {
            }

            double err = ud->tc->err(f1.data(), f2.data(), f1.size());
-            if (err > ud->tc->max_err(ud->backend1)) {
-                printf("[%s] ERR = %.9f > %.9f ", ggml_op_desc(t1), err, ud->tc->max_err(ud->backend1));
+            if (err > ud->tc->max_err()) {
+                printf("[%s] ERR = %.9f > %.9f ", ggml_op_desc(t1), err, ud->tc->max_err());
                //for (int i = 0; i < (int) f1.size(); i++) {
                //    printf("%5d %9.6f %9.6f, diff = %9.6f\n", i, f1[i], f2[i], f1[i] - f2[i]);
                //}
@@ -1423,13 +1383,7 @@ struct test_case {
            GGML_UNUSED(index);
        };

-        std::vector<ggml_tensor *> fused_nodes_to_verify = fusion_test_nodes();
-        if (fused_nodes_to_verify.size() == 0 && run_whole_graph()) {
-            fused_nodes_to_verify.push_back(out);
-        }
-        const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud,
-                                                               run_whole_graph() ? fused_nodes_to_verify.data() : nullptr,
-                                                               fused_nodes_to_verify.size());
+        const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud, run_whole_graph() ? out : nullptr);

        ggml_backend_buffer_free(buf);

@@ -3462,65 +3416,6 @@ struct test_rms_norm_mul_add : public test_case {
    }
 };

-// GGML_OP_ADD + GGML_OP_RMS_NORM (fused operation)
-struct test_add_rms_norm : public test_case {
-    const ggml_type type;
-    const std::array<int64_t, 4> ne;
-    const float eps;
-    const bool broadcast;
-
-    std::string op_desc(ggml_tensor * t) override {
-        GGML_UNUSED(t);
-        return "ADD_RMS_NORM";
-    }
-
-    bool run_whole_graph() override { return true; }
-
-    std::string vars() override {
-        return VARS_TO_STR4(type, ne, eps, broadcast);
-    }
-
-    test_add_rms_norm(ggml_type type = GGML_TYPE_F32,
-            std::array<int64_t, 4> ne = {64, 5, 4, 3},
-            float eps = 1e-6f, bool broadcast = false)
-        : type(type), ne(ne), eps(eps), broadcast(broadcast) {}
-
-    ggml_tensor * build_graph(ggml_context * ctx) override {
-        std::array<int64_t, 4> broadcast_dims = {ne[0]*2, ne[1]*3, ne[2]*3, ne[3]*4};
-
-        ggml_tensor * a = ggml_new_tensor(ctx, type, 4, broadcast ? broadcast_dims.data() : ne.data());
-        ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
-
-        ggml_set_param(a);
-        ggml_set_name(a, "a");
-        ggml_set_param(b);
-        ggml_set_name(b, "b");
-
-        // ADD operation followed by RMS_NORM
-        ggml_tensor * add_result = ggml_add(ctx, a, b);
-        ggml_set_name(add_result, "add_result");
-
-        ggml_tensor * out = ggml_rms_norm(ctx, add_result, eps);
-        ggml_set_name(out, "out");
-
-        return out;
-    }
-
-    void initialize_tensors(ggml_context * ctx) override {
-        for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
-            init_tensor_uniform(t, -10.f, 10.f);
-        }
-    }
-
-    float grad_eps() override {
-        return 1.0f;
-    }
-
-    bool grad_precise() override {
-        return true;
-    }
-};
-
 // GGML_OP_SSM_CONV
 struct test_ssm_conv : public test_case {
    const ggml_type type;
@@ -3717,14 +3612,6 @@ struct test_mul_mat : public test_case {
        return 5e-4;
    }

-    double max_nmse_err(ggml_backend_t backend) override {
-        // for blackwell we quantize activations to mxfp4 instead of q8_1 so we add higher tolerance
-        if (type_a == GGML_TYPE_MXFP4 && backend_has_feature(backend, "BLACKWELL_NATIVE_FP4")) {
-            return 2e-2;
-        }
-        return max_nmse_err();
-    }
-
    int64_t grad_nmax() override {
        return 20000;
    }
@@ -3853,14 +3740,6 @@ struct test_mul_mat_id : public test_case {
        return 5e-4;
    }

-    double max_nmse_err(ggml_backend_t backend) override {
-        // for blackwell we quantize activations to mxfp4 instead of q8_1 so we add higher tolerance
-        if (type_a == GGML_TYPE_MXFP4 && backend_has_feature(backend, "BLACKWELL_NATIVE_FP4")) {
-            return 2e-2;
-        }
-        return max_nmse_err();
-    }
-
    uint64_t op_flops(ggml_tensor * t) override {
        GGML_UNUSED(t);
        return 2 * m * k * n * n_used;
@@ -5293,8 +5172,6 @@ struct test_topk_moe : public test_case {
    const bool bias_probs;
    const MoeGatingFunc gating_func;
    const float scale_w;
-    ggml_tensor * weights {};
-    ggml_tensor * selected_experts {};

    test_topk_moe(std::array<int64_t, 4> ne              = { 10, 5, 1, 1 },
                  int                    n_expert_used   = 1,
@@ -5332,16 +5209,16 @@ struct test_topk_moe : public test_case {

        ggml_tensor * selection_probs = probs;
        if (bias_probs) {
-            ggml_tensor * exp_probs_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ne[0]);
+            ggml_tensor * exp_probs_b = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne.data());
            ggml_set_name(exp_probs_b, "exp_probs_b");
            selection_probs = ggml_add(ctx, probs, exp_probs_b);
            ggml_set_name(selection_probs, "selection_probs");
        }

-        selected_experts = ggml_argsort_top_k(ctx, selection_probs, n_expert_used); // [n_expert_used, n_tokens]
+        ggml_tensor * selected_experts = ggml_argsort_top_k(ctx, selection_probs, n_expert_used); // [n_expert_used, n_tokens]
        ggml_set_name(selected_experts, "selected_experts");

-        weights = ggml_get_rows(ctx, ggml_reshape_3d(ctx, probs, 1, n_expert, n_tokens), selected_experts); // [1, n_expert_used, n_tokens]
+        ggml_tensor * weights = ggml_get_rows(ctx, ggml_reshape_3d(ctx, probs, 1, n_expert, n_tokens), selected_experts); // [1, n_expert_used, n_tokens]
        ggml_set_name(weights, "weights");

        if (gating_func == GATING_FUNC_SOFTMAX_WEIGHT) {
@@ -5367,21 +5244,6 @@ struct test_topk_moe : public test_case {
        ggml_set_name(weights, "weights");
        return weights;
    }
-    // Verify two outputs
-    std::vector<ggml_tensor *> fusion_test_nodes() override { return { selected_experts, weights }; }
-
-    // allow output in arbitrary order
-    double err(const float * a, const float * b, size_t n) override {
-        std::vector<float> a2(n);
-        std::vector<float> b2(n);
-        for (size_t i = 0; i < n; ++i) {
-            a2[i] = a[i];
-            b2[i] = b[i];
-        }
-        std::sort(a2.begin(), a2.end());
-        std::sort(b2.begin(), b2.end());
-        return nmse(a2.data(), b2.data(), n);
-    }
 };

 struct test_mul_mat_vec_fusion : public test_case {
@@ -5673,16 +5535,18 @@ struct test_interpolate : public test_case {
    const ggml_type type;
    const std::array<int64_t, 4> ne;
    const std::array<int64_t, 4> ne_tgt;
-    const ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST;
+    const uint32_t mode = GGML_SCALE_MODE_NEAREST;

    std::string vars() override {
-        return VARS_TO_STR4(type, ne, ne_tgt, mode);
+        ggml_scale_mode mode = (ggml_scale_mode)(this->mode & 0xFF);
+        std::string flags = (this->mode & GGML_SCALE_FLAG_ALIGN_CORNERS) ? "align_corners" : "none";
+        return VARS_TO_STR5(type, ne, ne_tgt, mode, flags);
    }

    test_interpolate(ggml_type type = GGML_TYPE_F32,
            std::array<int64_t, 4> ne     = {2, 5,  7, 11},
            std::array<int64_t, 4> ne_tgt = {5, 7, 11, 13},
-            ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST)
+            uint32_t mode = GGML_SCALE_MODE_NEAREST)
        : type(type), ne(ne), ne_tgt(ne_tgt), mode(mode) {}

    ggml_tensor * build_graph(ggml_context * ctx) override {
@@ -7482,35 +7346,28 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
    test_cases.emplace_back(new test_softcap(GGML_TYPE_F32, {10, 10, 10, 10}, 50.0f));
    test_cases.emplace_back(new test_silu_back());

-    for (float eps : { 0.0f, 1e-6f, 1e-4f, 1e-1f }) {
-        for (uint32_t n : { 64, 1025 }) {
-            for (bool v : { false, true }) {
-                test_cases.emplace_back(new test_norm(GGML_TYPE_F32, { n, 5, 4, 3 }, v, eps));
-                test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, { n, 5, 4, 3 }, v, eps));
-            }
-            test_cases.emplace_back(new test_rms_norm_back(GGML_TYPE_F32, { n, 5, 4, 3 }, eps));
-            test_cases.emplace_back(new test_l2_norm(GGML_TYPE_F32, { n, 5, 4, 3 }, eps));
+    for (float eps : {0.0f, 1e-6f, 1e-4f, 1e-1f}) {
+        for (bool v : {false, true}) {
+            test_cases.emplace_back(new test_norm    (GGML_TYPE_F32, {64, 5, 4, 3}, v, eps));
+            test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, {64, 5, 4, 3}, v, eps));
        }
+        test_cases.emplace_back(new test_rms_norm_back(GGML_TYPE_F32, {64, 5, 4, 3}, eps));
+        test_cases.emplace_back(new test_l2_norm      (GGML_TYPE_F32, {64, 5, 4, 3}, eps));
    }

    // in-place tests
    test_cases.emplace_back(new test_rms_norm(GGML_TYPE_F32, {64, 5, 4, 3}, false, 1e-6f, true));

-    for (float eps : { 0.0f, 1e-6f, 1e-4f, 1e-1f, 1.0f }) {
-        for (uint32_t n : { 64, 1025 }) {
-            test_cases.emplace_back(new test_rms_norm_mul_add(GGML_TYPE_F32, { n, 5, 4, 3 }, eps, false));
-            test_cases.emplace_back(new test_rms_norm_mul_add(GGML_TYPE_F32, { n, 5, 4, 3 }, eps, true));
-            test_cases.emplace_back(new test_norm_mul_add(GGML_TYPE_F32, { n, 5, 4, 3 }, eps, false));
-            test_cases.emplace_back(new test_norm_mul_add(GGML_TYPE_F32, { n, 5, 4, 3 }, eps, true));
-            test_cases.emplace_back(new test_add_rms_norm(GGML_TYPE_F32, { n, 5, 4, 3 }, eps, false));
-            test_cases.emplace_back(new test_add_rms_norm(GGML_TYPE_F32, { n, 5, 4, 3 }, eps, true));
-        }
+    for (float eps : {0.0f, 1e-6f, 1e-4f, 1e-1f, 1.0f}) {
+        test_cases.emplace_back(new test_rms_norm_mul_add(GGML_TYPE_F32, {64, 5, 4, 3}, eps, false));
+        test_cases.emplace_back(new test_rms_norm_mul_add(GGML_TYPE_F32, {64, 5, 4, 3}, eps, true));
+        test_cases.emplace_back(new test_norm_mul_add(GGML_TYPE_F32, {64, 5, 4, 3}, eps, false));
+        test_cases.emplace_back(new test_norm_mul_add(GGML_TYPE_F32, {64, 5, 4, 3}, eps, true));
    }
    for (uint32_t n : {1, 511, 1025, 8192, 33*512}) {
        for (bool multi_add : {false, true}) {
            test_cases.emplace_back(new test_rms_norm_mul_add(GGML_TYPE_F32, {n, 1, 1, 1}, 1e-6f, false, multi_add));
        }
-        test_cases.emplace_back(new test_add_rms_norm(GGML_TYPE_F32, {n, 1, 1, 1}, 1e-6f, false));
    }

    for (auto multi_add : {false, true}) {
@@ -7528,6 +7385,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
            }
        }
    }
+
+    test_cases.emplace_back(new test_l2_norm(GGML_TYPE_F32, {64, 5, 4, 3}, 1e-12f));
+
    for (int64_t d_conv : {3, 4, 9}) {
        for (int64_t d_inner: {1024, 1536, 2048}) {
            test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {d_conv, d_inner, 1, 1}, {d_conv, d_inner, 1, 1}));
@@ -7561,11 +7421,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
    test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F16, 1700000, 96, 2592, {1, 1}, {1, 1}));
    test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F16, 1700000,  3, 2592, {1, 1}, {1, 1}));
    test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F16, 1700000,  1, 2592, {1, 1}, {1, 1}));
-
-    test_cases.emplace_back(new test_mul_mat_id(GGML_TYPE_Q8_0, GGML_TYPE_F32, 128, 128, false, 8192, 2, 5120)); // Llama-4-Maverick-17B-128E-PAB-Q8_0
-    test_cases.emplace_back(new test_mul_mat_id(GGML_TYPE_Q8_0, GGML_TYPE_F32, 128, 128, false, 8192, 1, 5120)); // Llama-4-Maverick-17B-128E-PAB-Q8_0
-    test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 8192, 1, 5120, {128, 1}, {1, 1}));
-    test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q8_0, GGML_TYPE_F32, 8192, 512, 5120, {128, 1}, {1, 1}));
 #endif

    for (ggml_type type_a : all_types) {
@@ -7678,10 +7533,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
    test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F32, GGML_TYPE_F32, 64, 77, 77, {12,1}, {1,1}));

    test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q4_0, GGML_TYPE_F32, 576, 512, 576, {1,1}, {1,1}));
-    test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q4_0, GGML_TYPE_F32, 1, 2048, 8192, {1,  1}, {1, 1}));
-    for (ggml_type type_a : all_types) {
-        test_cases.emplace_back(new test_mul_mat(type_a, GGML_TYPE_F32, 1, 64, 256, {1,  1}, {1, 1}));
-    }

 #if 0
    // test the mat-mat path for Metal
@@ -7894,11 +7745,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
    test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true,  true,  GGML_TYPE_F32, {1, 1}, 0.1f, 8.0f));
    test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true,  true,  GGML_TYPE_F16, {1, 1}, 0.1f, 8.0f));

-    test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {200001, 2, 3, 1}, true,   true,  GGML_TYPE_F32, {1, 1}, 0.1f, 8.0f));
-    test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {200001, 2, 3, 1}, true,   true,  GGML_TYPE_F16, {1, 1}, 0.1f, 8.0f));
-    test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {200000, 1, 1, 1}, false,  false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f));
-    test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {200000, 4, 1, 1}, false,  false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f));
-    test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {643251, 3, 1, 1}, false,  false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f));
+    test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {200001, 2, 3, 1}, true,  true,  GGML_TYPE_F32, {1, 1}, 0.1f, 8.0f));
+    test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {200001, 2, 3, 1}, true,  true,  GGML_TYPE_F16, {1, 1}, 0.1f, 8.0f));

    for (float max_bias : {0.0f, 8.0f}) {
        for (float scale : {1.0f, 0.1f}) {
@@ -7927,7 +7775,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
                                    test_cases.emplace_back(new test_rope(type, {128,  40, 2, 1}, 128, GGML_ROPE_TYPE_NORMAL, 512, fs, ef, af, ff, v, fw)); // llama 13B
                                    test_cases.emplace_back(new test_rope(type, {128,  52, 2, 1}, 128, GGML_ROPE_TYPE_NORMAL, 512, fs, ef, af, ff, v, fw)); // llama 30B
                                    test_cases.emplace_back(new test_rope(type, {128,  64, 2, 1}, 128, GGML_ROPE_TYPE_NORMAL, 512, fs, ef, af, ff, v, fw)); // llama 65B
-                                    test_cases.emplace_back(new test_rope(type, {16, 16, 8192, 1}, 16, GGML_ROPE_TYPE_NORMAL, 512, fs, ef, af, ff, v, fw));
                                }

                                if (all) {
@@ -7942,7 +7789,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
                                    test_cases.emplace_back(new test_rope(type, { 80,  32, 2, 1},  20, GGML_ROPE_TYPE_NEOX, 512, fs, ef, af, ff, v, fw)); // neox (stablelm)
                                    test_cases.emplace_back(new test_rope(type, { 80,  32, 2, 1},  32, GGML_ROPE_TYPE_NEOX, 512, fs, ef, af, ff, v, fw)); // neox (phi-2)
                                    test_cases.emplace_back(new test_rope(type, { 80,  32, 4, 1},  32, GGML_ROPE_TYPE_NEOX, 512, fs, ef, af, ff, v, fw)); // neox (phi-2)
-                                    test_cases.emplace_back(new test_rope(type, { 16, 16, 8192, 1},  16, GGML_ROPE_TYPE_NEOX, 512, fs, ef, af, ff, v, fw));
                                }

                                if (all) {
@@ -7956,7 +7802,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
                                    test_cases.emplace_back(new test_rope(type, {128,  28, 2, 1},  32, GGML_ROPE_TYPE_IMROPE,  512, fs, ef, af, ff, v, fw));
                                    test_cases.emplace_back(new test_rope(type, { 80,  16, 2, 1},  80, GGML_ROPE_TYPE_VISION, 512, fs, ef, af, ff, v, fw)); // rope_multi,m-rope (qwen2vl ViT)
                                    test_cases.emplace_back(new test_rope(type, {128,  16, 2, 1}, 128, GGML_ROPE_TYPE_IMROPE, 512, fs, ef, af, ff, v, fw)); // rope_multi,m-rope (qwen3vl)
-                                    test_cases.emplace_back(new test_rope(type, {16, 16, 8192, 1}, 16, GGML_ROPE_TYPE_IMROPE, 512, fs, ef, af, ff, v, fw));
                                }

                                test_cases.emplace_back(new test_rope(type, { 64, 128, 2, 1},  64, GGML_ROPE_TYPE_NEOX, 512, fs, ef, af, ff, v, fw)); // neox (falcon 40B)
@@ -8002,11 +7847,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
        test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {2, 8, 8192, 1}, order)); // bailingmoe2 (group selection)
    }

-    for (int n = 1; n < 5; ++n) {
-        for (int k = 1; k <= n; ++k) {
-            test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {n, 2, 1, 3}, k, true));
-        }
-    }
    for (int i = 0; i < 20; ++i) {
        for (int k : {1, 2, 3, 7, 15, 100, 500, 1023, 9999}) {
            if (k <= 1<<i) {
@@ -8040,9 +7880,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {5, 7, 11, 13}, {2, 5,  7, 11}, mode));
    }
    for (ggml_scale_mode mode : {GGML_SCALE_MODE_BILINEAR, GGML_SCALE_MODE_BICUBIC}) {
-        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS)));
-        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {1, 4, 3, 2}, {2, 8, 3, 2}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS)));
-        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {4, 1, 3, 2}, {1, 1, 3, 2}, (ggml_scale_mode)(mode | GGML_SCALE_FLAG_ALIGN_CORNERS)));
+        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS));
+        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {1, 4, 3, 2}, {2, 8, 3, 2}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS));
+        test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {4, 1, 3, 2}, {1, 1, 3, 2}, mode | GGML_SCALE_FLAG_ALIGN_CORNERS));
    }

    test_cases.emplace_back(new test_sum());
@@ -8094,7 +7934,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
    test_cases.emplace_back(new test_cumsum(GGML_TYPE_F32, { 2048, 5, 4, 3 }));
    test_cases.emplace_back(new test_cumsum(GGML_TYPE_F32, { 201*1204, 1, 1, 1 }));
    test_cases.emplace_back(new test_cumsum(GGML_TYPE_F32, { 312*1205, 1, 1, 1 }));
-    test_cases.emplace_back(new test_cumsum(GGML_TYPE_F32, { 20481, 4, 1, 1 }));

    test_cases.emplace_back(new test_xielu());

@@ -8237,7 +8076,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
                    test_cases.emplace_back(new test_topk_moe({71, 22, 1, 1}, 8, with_norm, bias_probs, gate, scale_w));
                    test_cases.emplace_back(new test_topk_moe({128, 1, 1, 1}, 128, with_norm, bias_probs, gate, scale_w));
                    test_cases.emplace_back(new test_topk_moe({129, 1, 1, 1}, 128, with_norm, bias_probs, gate, scale_w));
-                    test_cases.emplace_back(new test_topk_moe({160, 4, 1, 1}, 160, with_norm, bias_probs, gate, scale_w));
                }
            }
        }
@@ -8423,12 +8261,6 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
        }
    }

-    for (int col : {8192, 16384, 32768, 65536, 131072, 262144, 524288}) {
-        for (int rows : {1, 4, 16}){
-            test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {col, rows, 1, 1}, false,  false,  GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f));
-        }
-    }
-
    test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, false));
    test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, true));

@@ -8472,9 +8304,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
        test_cases.emplace_back(new test_sum(GGML_TYPE_F32, it));
    }

-    test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {65000,  16, 1, 1}));
-    test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {200000, 1,  1, 1}));
-    test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {200000, 16, 1, 1}));
+    test_cases.emplace_back(new test_argsort(GGML_TYPE_F32, {65000, 16, 1, 1}));

    test_cases.emplace_back(new test_top_k(GGML_TYPE_F32, {2, 1, 1, 1}, 1));
    for (auto k : {1, 10, 40, 400}) {
@@ -8485,18 +8315,13 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_perf() {
        }
    }

-    for (auto nrows : {1, 4, 8, 16}) {
-        for (auto cols : {128, 1024, 4096, 8192, 16384, 32768, 65536, 131072, 200000, 2000000}) {
-            test_cases.emplace_back(new test_cumsum(GGML_TYPE_F32, {cols, nrows, 1, 1}));
-        }
-    }
-
    // Examples from granite-4.0-h-1b/ggml-model-Q8_0.gguf
    test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {515, 3328, 1, 1}, {4, 3328, 1, 1})); // prefill
    test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4,   3328, 1, 1}, {4, 3328, 1, 1})); // generate
    test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 48, 1, 512, 1)); // prefill
    test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 48, 1, 1,   1)); // generate

+
    return test_cases;
 }

--- a/tests/test-backend-sampler.cpp
+++ b/tests/test-backend-sampler.cpp
--- a/tests/test-chat.cpp
+++ b/tests/test-chat.cpp
@@ -650,7 +650,7 @@ static void test_msgs_oaicompat_json_conversion() {
            "[\n"
            "  {\n"
            "    \"role\": \"assistant\",\n"
-            "    \"content\": \"\",\n"
+            "    \"content\": null,\n"
            "    \"tool_calls\": [\n"
            "      {\n"
            "        \"type\": \"function\",\n"
@@ -724,30 +724,6 @@ static void test_tools_oaicompat_json_conversion() {
            "]"
        ),
        common_chat_tools_to_json_oaicompat<json>({special_function_tool}).dump(2));
-
-    {
-        auto tools_no_params = common_chat_tools_parse_oaicompat(json::parse(
-            R"([{"type": "function", "function": {"name": "test_func", "description": "A test"}}])"));
-        assert_equals((size_t) 1, tools_no_params.size());
-        assert_equals(std::string("test_func"), tools_no_params[0].name);
-        assert_equals(std::string("A test"), tools_no_params[0].description);
-        assert_equals(std::string("{}"), tools_no_params[0].parameters);
-    }
-    {
-        auto tools_no_desc = common_chat_tools_parse_oaicompat(json::parse(
-            R"([{"type": "function", "function": {"name": "test_func", "parameters": {"type": "object"}}}])"));
-        assert_equals((size_t) 1, tools_no_desc.size());
-        assert_equals(std::string("test_func"), tools_no_desc[0].name);
-        assert_equals(std::string(""), tools_no_desc[0].description);
-    }
-    {
-        auto tools_minimal = common_chat_tools_parse_oaicompat(json::parse(
-            R"([{"type": "function", "function": {"name": "test_func"}}])"));
-        assert_equals((size_t) 1, tools_minimal.size());
-        assert_equals(std::string("test_func"), tools_minimal[0].name);
-        assert_equals(std::string(""), tools_minimal[0].description);
-        assert_equals(std::string("{}"), tools_minimal[0].parameters);
-    }
 }

 static void test_template_output_parsers() {
@@ -930,8 +906,7 @@ static void test_template_output_parsers() {
                      "      },\n"
                      "      \"id\": \"123456789\"\n"
                      "    }\n"
-                      "  ],\n"
-                      "  \"content\": \"\"\n"
+                      "  ]\n"
                      "}");
    }
    {
@@ -1738,8 +1713,7 @@ static void test_template_output_parsers() {
                      "      },\n"
                      "      \"id\": \"123456789\"\n"
                      "    }\n"
-                      "  ],\n"
-                      "  \"content\": \"\"\n"
+                      "  ]\n"
                      "}",
                      /* expect_grammar_triggered= */ false
        );
--- a/tests/test-regex-partial.cpp
+++ b/tests/test-regex-partial.cpp
@@ -232,52 +232,52 @@ static void test_regex_to_reversed_partial_regex() {
    printf("[%s]\n", __func__);

    assert_equals<std::string>(
-        "^((?:(?:c)?b)?a)",
+        "((?:(?:c)?b)?a)[\\s\\S]*",
        regex_to_reversed_partial_regex("abc"));

    assert_equals<std::string>(
-        "^(a+)",
+        "(a+)[\\s\\S]*",
        regex_to_reversed_partial_regex("a+"));

    assert_equals<std::string>(
-        "^(a*)",
+        "(a*)[\\s\\S]*",
        regex_to_reversed_partial_regex("a*"));

    assert_equals<std::string>(
-        "^(a?)",
+        "(a?)[\\s\\S]*",
        regex_to_reversed_partial_regex("a?"));

    assert_equals<std::string>(
-        "^([a-z])",
+        "([a-z])[\\s\\S]*",
        regex_to_reversed_partial_regex("[a-z]"));

    assert_equals<std::string>(
-        "^((?:\\w+)?[a-z])",
+        "((?:\\w+)?[a-z])[\\s\\S]*",
        regex_to_reversed_partial_regex("[a-z]\\w+"));

    assert_equals<std::string>(
-        "^((?:a|b))",
+        "((?:a|b))[\\s\\S]*",
        regex_to_reversed_partial_regex("(?:a|b)"));
    assert_equals<std::string>(
-        "^((?:(?:(?:d)?c)?b)?a)",
+        "((?:(?:(?:d)?c)?b)?a)[\\s\\S]*",
        regex_to_reversed_partial_regex("abcd"));
    assert_equals<std::string>(
-        "^((?:b)?a*)", // TODO: ((?:b)?a*+).* ??
+        "((?:b)?a*)[\\s\\S]*", // TODO: ((?:b)?a*+).* ??
        regex_to_reversed_partial_regex("a*b"));
    assert_equals<std::string>(
-        "^((?:(?:b)?a)?.*)",
+        "((?:(?:b)?a)?.*)[\\s\\S]*",
        regex_to_reversed_partial_regex(".*?ab"));
    assert_equals<std::string>(
-        "^((?:(?:b)?.*)?a)",
+        "((?:(?:b)?.*)?a)[\\s\\S]*",
        regex_to_reversed_partial_regex("a.*?b"));
    assert_equals<std::string>(
-        "^((?:(?:d)?(?:(?:c)?b))?a)",
+        "((?:(?:d)?(?:(?:c)?b))?a)[\\s\\S]*",
        regex_to_reversed_partial_regex("a(bc)d"));
    assert_equals<std::string>(
-        "^((?:(?:(?:c)?b|(?:e)?d))?a)",
+        "((?:(?:(?:c)?b|(?:e)?d))?a)[\\s\\S]*",
        regex_to_reversed_partial_regex("a(bc|de)"));
    assert_equals<std::string>(
-        "^((?:(?:(?:(?:(?:c)?b?)?b?)?b)?b)?a)",
+        "((?:(?:(?:(?:(?:c)?b?)?b?)?b)?b)?a)[\\s\\S]*",
        regex_to_reversed_partial_regex("ab{2,4}c"));
 }