ggml-cpu: clean up s390x SIMD (#15855)

* ggml-cpu: clean up s390x simd Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> (cherry picked from commit 0da4b6aa07d96b758812d17b2c82267632fa4ba5) Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-cpu: fix hsum data types Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> --------- Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
2025-09-08 02:18:28 +08:00
parent c97b5e5854
commit d36e61c580
2 changed files with 63 additions and 60 deletions
--- a/ggml/src/ggml-cpu/ggml-cpu-impl.h
+++ b/ggml/src/ggml-cpu/ggml-cpu-impl.h
@@ -483,11 +483,16 @@ inline static int16x8_t vec_padd_s16(int16x8_t a, int16x8_t b) {
 /**
 * @see https://github.com/ggml-org/llama.cpp/pull/14037
 */
-inline static float vec_hsum(float32x4_t v) {
+inline static float vec_hsum_f32x4(float32x4_t v) {
    float32x4_t v_temp = v + vec_reve(v);
    return v_temp[0] + v_temp[1];
 }

+inline static int32_t vec_hsum_i32x4(int32x4_t v) {
+    int32x4_t v_temp = v + vec_reve(v);
+    return v_temp[0] + v_temp[1];
+}
+
 inline static int32x4_t ggml_vec_dot(int32x4_t acc, int8x16_t a, int8x16_t b) {
    const int16x8_t p = vec_mule(a, b) + vec_mulo(a, b);
    return acc + (vec_unpackh(p) + vec_unpackl(p));