sync from b7516

2026-01-16 11:16:14 +08:00
parent f4ae4cc7da
commit 6ee41dd9e3
380 changed files with 18435 additions and 38806 deletions
--- a/scripts/debug-test.sh
+++ b/scripts/debug-test.sh
@@ -109,7 +109,8 @@ rm -rf "$build_dir" && mkdir "$build_dir" || abort "Failed to make $build_dir"
 # Step 2: Setup Build Environment and Compile Test Binaries
 ###########################################################

-cmake -B "./$build_dir" -DCMAKE_BUILD_TYPE=Debug -DGGML_CUDA=1 || abort "Failed to build environment"
+# Note: test-eval-callback requires -DLLAMA_CURL
+cmake -B "./$build_dir" -DCMAKE_BUILD_TYPE=Debug -DGGML_CUDA=1 -DLLAMA_CURL=1 || abort "Failed to build environment"
 pushd "$build_dir"
 make -j || abort "Failed to compile"
 popd > /dev/null || exit 1
--- a/scripts/pr2wt.sh
+++ b/scripts/pr2wt.sh
@@ -1,79 +0,0 @@
-#!/usr/bin/env bash
-
-# intialize a new worktree from a PR number:
-#
-# - creates a new remote using the fork's clone URL
-# - creates a local branch tracking the remote branch
-# - creates a new worktree in a parent folder, suffixed with "-pr-$PR"
-#
-# sample usage:
-#   ./scripts/pr2wt.sh 12345
-#   ./scripts/pr2wt.sh 12345 opencode
-#   ./scripts/pr2wt.sh 12345 "cmake -B build && cmake --build build"
-#   ./scripts/pr2wt.sh 12345 "bash -l"
-
-function usage() {
-    echo "usage: $0 <pr_number> [cmd]"
-    exit 1
-}
-
-# check we are in the right directory
-if [[ ! -f "scripts/pr2wt.sh" ]]; then
-    echo "error: this script must be run from the root of the repository"
-    exit 1
-fi
-
-if [[ $# -lt 1 || $# -gt 2 ]]; then
-    usage
-fi
-
-PR=$1
-[[ "$PR" =~ ^[0-9]+$ ]] || { echo "error: PR number must be numeric"; exit 1; }
-
-url_origin=$(git config --get remote.origin.url) || {
-    echo "error: no remote named 'origin' in this repository"
-    exit 1
-}
-
-org_repo=$(echo $url_origin | cut -d/ -f4-)
-org_repo=${org_repo%.git}
-
-echo "org/repo: $org_repo"
-
-meta=$(curl -sSLf -H "Accept: application/vnd.github+json" "https://api.github.com/repos/$org_repo/pulls/$PR")
-
-url_remote=$(echo "$meta" | jq -r '.head.repo.clone_url')
-head_ref=$(echo "$meta" | jq -r '.head.ref')
-
-echo "url:      $url_remote"
-echo "head_ref: $head_ref"
-
-url_remote_cur=$(git config --get "remote.pr/$PR.url" 2>/dev/null || true)
-
-if [[ "$url_remote_cur" != "$url_remote" ]]; then
-    git remote rm  pr/$PR 2> /dev/null
-    git remote add pr/$PR "$url_remote"
-fi
-
-git fetch "pr/$PR" "$head_ref"
-
-dir=$(basename $(pwd))
-
-git branch -D pr/$PR 2> /dev/null
-git worktree add -b pr/$PR ../$dir-pr-$PR pr/$PR/$head_ref 2> /dev/null
-
-wt_path=$(cd ../$dir-pr-$PR && pwd)
-
-echo "git worktree created in $wt_path"
-
-cd $wt_path
-git branch --set-upstream-to=pr/$PR/$head_ref
-git pull   --ff-only || {
-    echo "error: failed to pull pr/$PR"
-    exit 1
-}
-
-if [[ $# -eq 2 ]]; then
-    echo "executing: $2"
-    eval "$2"
-fi
--- a/scripts/serve-static.js
+++ b/scripts/serve-static.js
@@ -4,7 +4,7 @@ const path = require('path');

 // This file is used for testing wasm build from emscripten
 // Example build command:
-// emcmake cmake -B build-wasm -DGGML_WEBGPU=ON -DLLAMA_OPENSSL=OFF
+// emcmake cmake -B build-wasm -DGGML_WEBGPU=ON -DLLAMA_CURL=OFF
 // cmake --build build-wasm --target test-backend-ops -j

 const PORT = 8080;
--- a/scripts/snapdragon/adb/run-bench.sh
+++ b/scripts/snapdragon/adb/run-bench.sh
@@ -10,23 +10,14 @@ branch=.
 adbserial=
 [ "$S" != "" ] && adbserial="-s $S"

-adbhost=
-[ "$H" != "" ] && adbhost="-H $H"
-
 model="Llama-3.2-3B-Instruct-Q4_0.gguf"
 [ "$M" != "" ] && model="$M"

 device="HTP0"
 [ "$D" != "" ] && device="$D"

-verbose=
-[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
-
-experimental=
-[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E"
-
-profile=
-[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1" cli_opts="$cli_opts -v"
+verbose=""
+[ "$V" != "" ] && verbose="$V"

 opmask=
 [ "$OPMASK" != "" ] && opmask="GGML_HEXAGON_OPMASK=$OPMASK"
@@ -37,16 +28,13 @@ nhvx=
 ndev=
 [ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"

-hb=
-[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
-
 set -x

-adb $adbserial $adbhost shell " \
+adb $adbserial shell " \
  cd $basedir;         \
  LD_LIBRARY_PATH=$basedir/$branch/lib   \
  ADSP_LIBRARY_PATH=$basedir/$branch/lib \
-    $ndev $nhvx $opmask $verbose $experimental $profile $hb ./$branch/bin/llama-bench --device $device --mmap 0 -m $basedir/../gguf/$model \
+    $ndev $nhvx $opmask ./$branch/bin/llama-bench --device $device --mmap 0 -m $basedir/../gguf/$model \
        --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1 \
-        --batch-size 128 -ngl 99 $cli_opts $@ \
+        --batch-size 128 -ngl 99 $@ \
 "
--- a/scripts/snapdragon/adb/run-cli.sh
+++ b/scripts/snapdragon/adb/run-cli.sh
@@ -12,26 +12,23 @@ branch=.
 adbserial=
 [ "$S" != "" ] && adbserial="-s $S"

-adbhost=
-[ "$H" != "" ] && adbhost="-H $H"
-
 model="Llama-3.2-3B-Instruct-Q4_0.gguf"
 [ "$M" != "" ] && model="$M"

 device="HTP0"
 [ "$D" != "" ] && device="$D"

+verbose=
+[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V"
+
 experimental=
 [ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E"

-verbose=
-[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
-
 sched=
 [ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"

 profile=
-[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1" cli_opts="$cli_opts -v"
+[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1"

 opmask=
 [ "$OPMASK" != "" ] && opmask="GGML_HEXAGON_OPMASK=$OPMASK"
@@ -42,18 +39,15 @@ nhvx=
 ndev=
 [ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"

-hb=
-[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
-
 set -x

-adb $adbserial $adbhost shell " \
+adb $adbserial shell " \
  cd $basedir; ulimit -c unlimited;        \
    LD_LIBRARY_PATH=$basedir/$branch/lib   \
    ADSP_LIBRARY_PATH=$basedir/$branch/lib \
-    $verbose $experimental $sched $opmask $profile $nhvx $ndev $hb \
-      ./$branch/bin/llama-cli --no-mmap -m $basedir/../gguf/$model \
-         --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1           \
-         --ctx-size 8192 --batch-size 128 -fa on \
-         -ngl 99 --device $device $cli_opts $@   \
+    $verbose $experimental $sched $opmask $profile $nhvx $ndev       \
+      ./$branch/bin/llama-completion --no-mmap -m $basedir/../gguf/$model   \
+         --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1             \
+         --ctx-size 8192 --batch-size 128 -ctk q8_0 -ctv q8_0 -fa on \
+         -ngl 99 --device $device $cli_opts $@ \
 "
--- a/scripts/snapdragon/adb/run-completion.sh
+++ b/scripts/snapdragon/adb/run-completion.sh
@@ -1,59 +0,0 @@
-#!/bin/sh
-#
-
-# Basedir on device
-basedir=/data/local/tmp/llama.cpp
-
-cli_opts=
-
-branch=.
-[ "$B" != "" ] && branch=$B
-
-adbserial=
-[ "$S" != "" ] && adbserial="-s $S"
-
-adbhost=
-[ "$H" != "" ] && adbhost="-H $H"
-
-model="Llama-3.2-3B-Instruct-Q4_0.gguf"
-[ "$M" != "" ] && model="$M"
-
-device="HTP0"
-[ "$D" != "" ] && device="$D"
-
-experimental=
-[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E"
-
-verbose=
-[ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V" cli_opts="$cli_opts -v"
-
-sched=
-[ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
-
-profile=
-[ "$PROF" != "" ] && profile="GGML_HEXAGON_PROFILE=$PROF GGML_HEXAGON_OPSYNC=1" cli_opts="$cli_opts -v"
-
-opmask=
-[ "$OPMASK" != "" ] && opmask="GGML_HEXAGON_OPMASK=$OPMASK"
-
-nhvx=
-[ "$NHVX" != "" ] && nhvx="GGML_HEXAGON_NHVX=$NHVX"
-
-ndev=
-[ "$NDEV" != "" ] && ndev="GGML_HEXAGON_NDEV=$NDEV"
-
-hb=
-[ "$HB" != "" ] && hb="GGML_HEXAGON_HOSTBUF=$HB"
-
-set -x
-
-adb $adbserial $adbhost shell " \
-  cd $basedir; ulimit -c unlimited;        \
-    LD_LIBRARY_PATH=$basedir/$branch/lib   \
-    ADSP_LIBRARY_PATH=$basedir/$branch/lib \
-    $verbose $experimental $sched $opmask $profile $nhvx $ndev $hb        \
-      ./$branch/bin/llama-completion --no-mmap -m $basedir/../gguf/$model \
-         --poll 1000 -t 6 --cpu-mask 0xfc --cpu-strict 1                  \
-         --ctx-size 8192 --batch-size 128 -fa on \
-         -ngl 99 -no-cnv --device $device $cli_opts $@   \
-"
--- a/scripts/snapdragon/adb/run-mtmd.sh
+++ b/scripts/snapdragon/adb/run-mtmd.sh
@@ -12,9 +12,6 @@ branch=.
 adbserial=
 [ "$S" != "" ] && adbserial="-s $S"

-adbhost=
-[ "$H" != "" ] && adbhost="-H $H"
-
 model="gemma-3-4b-it-Q4_0.gguf"
 [ "$M" != "" ] && model="$M"

@@ -54,7 +51,7 @@ mtmd_backend=

 set -x

-adb $adbserial $adbhost shell " \
+adb $adbserial shell " \
  cd $basedir; ulimit -c unlimited;        \
    LD_LIBRARY_PATH=$basedir/$branch/lib   \
    ADSP_LIBRARY_PATH=$basedir/$branch/lib \
--- a/scripts/snapdragon/adb/run-tool.sh
+++ b/scripts/snapdragon/adb/run-tool.sh
@@ -12,9 +12,6 @@ branch=.
 adbserial=
 [ "$S" != "" ] && adbserial="-s $S"

-adbhost=
-[ "$H" != "" ] && adbhost="-H $H"
-
 device="HTP0"
 [ "$D" != "" ] && device="$D"

@@ -22,7 +19,7 @@ verbose=
 [ "$V" != "" ] && verbose="GGML_HEXAGON_VERBOSE=$V"

 experimental=
-[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$E"
+[ "$E" != "" ] && experimental="GGML_HEXAGON_EXPERIMENTAL=$V"

 sched=
 [ "$SCHED" != "" ] && sched="GGML_SCHED_DEBUG=2" cli_opts="$cli_opts -v"
@@ -46,7 +43,7 @@ set -x

 tool=$1; shift

-adb $adbserial $adbhost shell " \
+adb $adbserial shell " \
  cd $basedir; ulimit -c unlimited;        \
    LD_LIBRARY_PATH=$basedir/$branch/lib   \
    ADSP_LIBRARY_PATH=$basedir/$branch/lib \
--- a/scripts/sync-ggml.last
+++ b/scripts/sync-ggml.last
@@ -1 +1 @@
-ebc3a0f4a56be1c9424a89fbec09962ac34fde85
+130bc125a88bb57664b88932c48c38a1cb316fac
--- a/scripts/sync_vendor.py
+++ b/scripts/sync_vendor.py
@@ -16,8 +16,7 @@ vendor = {
    # "https://github.com/mackron/miniaudio/raw/refs/tags/0.11.23/miniaudio.h": "vendor/miniaudio/miniaudio.h",
    "https://github.com/mackron/miniaudio/raw/669ed3e844524fcd883231b13095baee9f6de304/miniaudio.h": "vendor/miniaudio/miniaudio.h",

-    "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.1/httplib.h": "vendor/cpp-httplib/httplib.h",
-    "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.1/LICENSE":   "vendor/cpp-httplib/LICENSE",
+    "https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.28.0/httplib.h": "vendor/cpp-httplib/httplib.h",

    "https://raw.githubusercontent.com/sheredom/subprocess.h/b49c56e9fe214488493021017bf3954b91c7c1f5/subprocess.h": "vendor/sheredom/subprocess.h",
 }
--- a/scripts/tool_bench.py
+++ b/scripts/tool_bench.py
@@ -7,7 +7,7 @@

    Simple usage example:

-        cmake -B build && cmake --build build --config Release -j -t llama-server
+        cmake -B build -DLLAMA_CURL=1 && cmake --build build --config Release -j -t llama-server

        export LLAMA_SERVER_BIN_PATH=$PWD/build/bin/llama-server
        export LLAMA_CACHE=${LLAMA_CACHE:-$HOME/Library/Caches/llama.cpp}