Sync from upstream llama.cpp repository
This commit is contained in:
22
examples/gguf-hash/CMakeLists.txt
Normal file
22
examples/gguf-hash/CMakeLists.txt
Normal file
@@ -0,0 +1,22 @@
|
||||
set(TARGET llama-gguf-hash)
|
||||
add_executable(${TARGET} gguf-hash.cpp)
|
||||
install(TARGETS ${TARGET} RUNTIME)
|
||||
|
||||
# clibs dependencies
|
||||
include_directories(deps/)
|
||||
|
||||
add_library(xxhash OBJECT deps/xxhash/xxhash.c deps/xxhash/xxhash.h)
|
||||
target_link_libraries(${TARGET} PRIVATE xxhash)
|
||||
|
||||
add_library(sha1 OBJECT deps/sha1/sha1.c deps/sha1/sha1.h)
|
||||
target_link_libraries(${TARGET} PRIVATE sha1)
|
||||
if (NOT MSVC)
|
||||
# disable warnings in 3rd party code
|
||||
target_compile_options(sha1 PRIVATE -w)
|
||||
endif()
|
||||
|
||||
add_library(sha256 OBJECT deps/sha256/sha256.c deps/sha256/sha256.h)
|
||||
target_link_libraries(${TARGET} PRIVATE sha256)
|
||||
|
||||
target_link_libraries(${TARGET} PRIVATE ggml ${CMAKE_THREAD_LIBS_INIT})
|
||||
target_compile_features(${TARGET} PRIVATE cxx_std_17)
|
||||
206
examples/gguf-hash/README.md
Normal file
206
examples/gguf-hash/README.md
Normal file
@@ -0,0 +1,206 @@
|
||||
|
||||
# llama-gguf-hash
|
||||
|
||||
CLI to hash GGUF files to detect difference on a per model and per tensor level.
|
||||
|
||||
**Command line options:**
|
||||
|
||||
- `--help`: display help message
|
||||
- `--xxh64`: use xhash 64bit hash mode (default)
|
||||
- `--sha1`: use sha1
|
||||
- `--uuid`: use uuid
|
||||
- `--sha256`: use sha256
|
||||
- `--all`: use all hash
|
||||
- `--no-layer`: exclude per layer hash
|
||||
- `--uuid`: generate UUIDv5 ID
|
||||
- `-c`, `--check <manifest>`: verify against a manifest
|
||||
|
||||
## About
|
||||
|
||||
While most POSIX systems already have hash checking programs like sha256sum, it
|
||||
is designed to check entire files. This is not ideal for our purpose if we want
|
||||
to check for consistency of the tensor data even if the metadata content of the
|
||||
gguf KV store has been updated.
|
||||
|
||||
This program is designed to hash a gguf tensor payload on a 'per tensor layer'
|
||||
in addition to a 'entire tensor model' hash. The intent is that the entire
|
||||
tensor layer can be checked first but if there is any detected inconsistencies,
|
||||
then the per tensor hash can be used to narrow down the specific tensor layer
|
||||
that has inconsistencies.
|
||||
|
||||
For Maintainers:
|
||||
- Detection of tensor inconsistency during development and automated tests
|
||||
- This is served by xxh64 which is fast
|
||||
- This is also served by having per tensor layer to assist in narrowing down
|
||||
the location of the faulty tensor layer
|
||||
- This is also served by sha1 which is much slower but more widely supported
|
||||
|
||||
For Model Creators:
|
||||
- Optional consistent UUID generation based on model tensor content
|
||||
- This is served by UUIDv5 which is useful for databases keys
|
||||
- llama.cpp UUIDv5 Namespace: `ef001206-dadc-5f6d-a15f-3359e577d4e5`
|
||||
- Made via UUIDv5 URL namespace of `en.wikipedia.org/wiki/Llama.cpp`
|
||||
|
||||
For Model Users:
|
||||
- Assurance of tensor layer integrity even if metadata was updated
|
||||
- This is served by sha256 which is still considered very secure as of 2024
|
||||
|
||||
### Design Note
|
||||
|
||||
- The default behavior of this program if no arguments is provided is to hash
|
||||
using xxhash's xxh32 mode because it is very fast and is primarily targeted
|
||||
towards maintainers who may want to use this in automated tests.
|
||||
- xxhash support xxh32 and xxh128 for 32bit hash and 128bit hash respectively
|
||||
however we picked 64bit xxhash as most computers are 64bit as of 2024 and thus
|
||||
would have a better affinity to calculating hash that is 64bit in size.
|
||||
|
||||
## Compile Example
|
||||
|
||||
```bash
|
||||
cmake -B build -DCMAKE_BUILD_TYPE=Debug -DLLAMA_FATAL_WARNINGS=ON
|
||||
make -C build clean
|
||||
make -C build llama-gguf-hash VERBOSE=1
|
||||
./build/bin/llama-gguf-hash test.gguf
|
||||
./build/bin/llama-gguf-hash --xxh64 test.gguf
|
||||
./build/bin/llama-gguf-hash --sha1 test.gguf
|
||||
./build/bin/llama-gguf-hash --uuid test.gguf
|
||||
./build/bin/llama-gguf-hash --sha256 test.gguf
|
||||
```
|
||||
|
||||
## Generation and Verification Example
|
||||
|
||||
To generate we may use this command
|
||||
|
||||
```bash
|
||||
./llama-gguf-hash --all test.gguf > test.gguf.manifest
|
||||
```
|
||||
|
||||
Which would generate a manifest that looks like below, which contains multiple hash type and per tensor layer hashes as well
|
||||
(This excludes UUID as that is an ID not a hash)
|
||||
|
||||
```bash
|
||||
xxh64 f66e9cd66a4396a0 test.gguf:tensor_0
|
||||
sha1 59f79ecefd8125a996fdf419239051a7e99e5f20 test.gguf:tensor_0
|
||||
sha256 c0510d38fa060c46265e0160a85c7243096b01dd31c2f355bdbb5516b20de1bd test.gguf:tensor_0
|
||||
xxh64 7d3a1f9ac04d0537 test.gguf:tensor_1
|
||||
sha1 4765f592eacf096df4628ba59476af94d767080a test.gguf:tensor_1
|
||||
sha256 8514cbcc73692a2c56bd7a33a022edd5ff819614bd23b19915d7224387f397a7 test.gguf:tensor_1
|
||||
xxh64 a0af5d700049693b test.gguf:tensor_2
|
||||
sha1 25cbfbad4513cc348e2c95ebdee69d6ff2fd8753 test.gguf:tensor_2
|
||||
sha256 947e6b36e20f2cc95e1d2ce1c1669d813d574657ac6b5ac5196158d454d35180 test.gguf:tensor_2
|
||||
xxh64 e83fddf559d7b6a6 test.gguf:tensor_3
|
||||
sha1 a9cba73e2d90f2ee3dae2548caa42bef3fe6a96c test.gguf:tensor_3
|
||||
sha256 423b044e016d8ac73c39f23f60bf01bedef5ecb03c0230accd824c91fe86f1a1 test.gguf:tensor_3
|
||||
xxh64 1257733306b7992d test.gguf:tensor_4
|
||||
sha1 d7bc61db93bb685ce9d598da89717c66729b7543 test.gguf:tensor_4
|
||||
sha256 79737cb3912d4201384cf7f16a1a37ff7823f23ea796cb205b6ca361ab9e3ebf test.gguf:tensor_4
|
||||
xxh64 d238d16ba4711e58 test.gguf:tensor_5
|
||||
sha1 0706566c198fe1072f37e0a5135b4b5f23654c52 test.gguf:tensor_5
|
||||
sha256 60949be8298eced0ecdde64487643d018407bd261691e061d9e9c3dbc9fd358b test.gguf:tensor_5
|
||||
xxh64 3fbc3b65ab8c7f39 test.gguf:tensor_6
|
||||
sha1 73922a0727226a409049f6fc3172a52219ca6f00 test.gguf:tensor_6
|
||||
sha256 574f4c46ff384a3b9a225eb955d2a871847a2e8b3fa59387a8252832e92ef7b0 test.gguf:tensor_6
|
||||
xxh64 c22021c29854f093 test.gguf:tensor_7
|
||||
sha1 efc39cece6a951188fc41e354c73bbfe6813d447 test.gguf:tensor_7
|
||||
sha256 4c0410cd3c500f078ae5b21e8dc9eb79e29112713b2ab58a882f82a3868d4d75 test.gguf:tensor_7
|
||||
xxh64 936df61f5d64261f test.gguf:tensor_8
|
||||
sha1 c2490296d789a4f34398a337fed8377d943d9f06 test.gguf:tensor_8
|
||||
sha256 c4401313feeba0261275c3b25bd2d8fe40ce04e0f440c2980ed0e9674c30ff01 test.gguf:tensor_8
|
||||
xxh64 93fd20c64421c081 test.gguf:tensor_9
|
||||
sha1 7047ce1e78437a6884337a3751c7ee0421918a65 test.gguf:tensor_9
|
||||
sha256 23d57cf0d7a6e90b0b3616b41300e0cd354781e812add854a5f95aa55f2bc514 test.gguf:tensor_9
|
||||
xxh64 5a54d3aad816f302 test.gguf
|
||||
sha1 d15be52c4ff213e823cb6dd13af7ee2f978e7042 test.gguf
|
||||
sha256 7dd641b32f59b60dbd4b5420c4b0f6321ccf48f58f6ae201a3dbc4a58a27c6e4 test.gguf
|
||||
```
|
||||
|
||||
We can then use the normal check command which will by default check for the highest security strength hash and verify against that:
|
||||
|
||||
```bash
|
||||
$ ./llama-gguf-hash --check test.gguf.manifest test.gguf
|
||||
manifest test.gguf.manifest sha256 sha1 xxh64
|
||||
sha256 c0510d38fa060c46265e0160a85c7243096b01dd31c2f355bdbb5516b20de1bd test.gguf:tensor_0 - Ok
|
||||
sha256 8514cbcc73692a2c56bd7a33a022edd5ff819614bd23b19915d7224387f397a7 test.gguf:tensor_1 - Ok
|
||||
sha256 947e6b36e20f2cc95e1d2ce1c1669d813d574657ac6b5ac5196158d454d35180 test.gguf:tensor_2 - Ok
|
||||
sha256 423b044e016d8ac73c39f23f60bf01bedef5ecb03c0230accd824c91fe86f1a1 test.gguf:tensor_3 - Ok
|
||||
sha256 79737cb3912d4201384cf7f16a1a37ff7823f23ea796cb205b6ca361ab9e3ebf test.gguf:tensor_4 - Ok
|
||||
sha256 60949be8298eced0ecdde64487643d018407bd261691e061d9e9c3dbc9fd358b test.gguf:tensor_5 - Ok
|
||||
sha256 574f4c46ff384a3b9a225eb955d2a871847a2e8b3fa59387a8252832e92ef7b0 test.gguf:tensor_6 - Ok
|
||||
sha256 4c0410cd3c500f078ae5b21e8dc9eb79e29112713b2ab58a882f82a3868d4d75 test.gguf:tensor_7 - Ok
|
||||
sha256 c4401313feeba0261275c3b25bd2d8fe40ce04e0f440c2980ed0e9674c30ff01 test.gguf:tensor_8 - Ok
|
||||
sha256 23d57cf0d7a6e90b0b3616b41300e0cd354781e812add854a5f95aa55f2bc514 test.gguf:tensor_9 - Ok
|
||||
sha256 7dd641b32f59b60dbd4b5420c4b0f6321ccf48f58f6ae201a3dbc4a58a27c6e4 test.gguf - Ok
|
||||
|
||||
Verification results for test.gguf.manifest - Success
|
||||
```
|
||||
|
||||
Or we may explicitly ask for a faster hash like:
|
||||
|
||||
```bash
|
||||
$ ./llama-gguf-hash --check test.gguf.manifest --xxh64 test.gguf
|
||||
manifest test.gguf.manifest sha256 sha1 xxh64
|
||||
xxh64 f66e9cd66a4396a0 test.gguf:tensor_0 - Ok
|
||||
xxh64 7d3a1f9ac04d0537 test.gguf:tensor_1 - Ok
|
||||
xxh64 a0af5d700049693b test.gguf:tensor_2 - Ok
|
||||
xxh64 e83fddf559d7b6a6 test.gguf:tensor_3 - Ok
|
||||
xxh64 1257733306b7992d test.gguf:tensor_4 - Ok
|
||||
xxh64 d238d16ba4711e58 test.gguf:tensor_5 - Ok
|
||||
xxh64 3fbc3b65ab8c7f39 test.gguf:tensor_6 - Ok
|
||||
xxh64 c22021c29854f093 test.gguf:tensor_7 - Ok
|
||||
xxh64 936df61f5d64261f test.gguf:tensor_8 - Ok
|
||||
xxh64 93fd20c64421c081 test.gguf:tensor_9 - Ok
|
||||
xxh64 5a54d3aad816f302 test.gguf - Ok
|
||||
|
||||
Verification results for test.gguf.manifest - Success
|
||||
```
|
||||
|
||||
Or maybe we want to just check that all the hash is valid:
|
||||
|
||||
```bash
|
||||
$./llama-gguf-hash --check test.gguf.manifest --all test.gguf.manifest
|
||||
manifest test.gguf.manifest sha256 sha1 xxh64
|
||||
xxh64 f66e9cd66a4396a0 test.gguf:tensor_0 - Ok
|
||||
sha1 59f79ecefd8125a996fdf419239051a7e99e5f20 test.gguf:tensor_0 - Ok
|
||||
sha256 c0510d38fa060c46265e0160a85c7243096b01dd31c2f355bdbb5516b20de1bd test.gguf:tensor_0 - Ok
|
||||
xxh64 7d3a1f9ac04d0537 test.gguf:tensor_1 - Ok
|
||||
sha1 4765f592eacf096df4628ba59476af94d767080a test.gguf:tensor_1 - Ok
|
||||
sha256 8514cbcc73692a2c56bd7a33a022edd5ff819614bd23b19915d7224387f397a7 test.gguf:tensor_1 - Ok
|
||||
xxh64 a0af5d700049693b test.gguf:tensor_2 - Ok
|
||||
sha1 25cbfbad4513cc348e2c95ebdee69d6ff2fd8753 test.gguf:tensor_2 - Ok
|
||||
sha256 947e6b36e20f2cc95e1d2ce1c1669d813d574657ac6b5ac5196158d454d35180 test.gguf:tensor_2 - Ok
|
||||
xxh64 e83fddf559d7b6a6 test.gguf:tensor_3 - Ok
|
||||
sha1 a9cba73e2d90f2ee3dae2548caa42bef3fe6a96c test.gguf:tensor_3 - Ok
|
||||
sha256 423b044e016d8ac73c39f23f60bf01bedef5ecb03c0230accd824c91fe86f1a1 test.gguf:tensor_3 - Ok
|
||||
xxh64 1257733306b7992d test.gguf:tensor_4 - Ok
|
||||
sha1 d7bc61db93bb685ce9d598da89717c66729b7543 test.gguf:tensor_4 - Ok
|
||||
sha256 79737cb3912d4201384cf7f16a1a37ff7823f23ea796cb205b6ca361ab9e3ebf test.gguf:tensor_4 - Ok
|
||||
xxh64 d238d16ba4711e58 test.gguf:tensor_5 - Ok
|
||||
sha1 0706566c198fe1072f37e0a5135b4b5f23654c52 test.gguf:tensor_5 - Ok
|
||||
sha256 60949be8298eced0ecdde64487643d018407bd261691e061d9e9c3dbc9fd358b test.gguf:tensor_5 - Ok
|
||||
xxh64 3fbc3b65ab8c7f39 test.gguf:tensor_6 - Ok
|
||||
sha1 73922a0727226a409049f6fc3172a52219ca6f00 test.gguf:tensor_6 - Ok
|
||||
sha256 574f4c46ff384a3b9a225eb955d2a871847a2e8b3fa59387a8252832e92ef7b0 test.gguf:tensor_6 - Ok
|
||||
xxh64 c22021c29854f093 test.gguf:tensor_7 - Ok
|
||||
sha1 efc39cece6a951188fc41e354c73bbfe6813d447 test.gguf:tensor_7 - Ok
|
||||
sha256 4c0410cd3c500f078ae5b21e8dc9eb79e29112713b2ab58a882f82a3868d4d75 test.gguf:tensor_7 - Ok
|
||||
xxh64 936df61f5d64261f test.gguf:tensor_8 - Ok
|
||||
sha1 c2490296d789a4f34398a337fed8377d943d9f06 test.gguf:tensor_8 - Ok
|
||||
sha256 c4401313feeba0261275c3b25bd2d8fe40ce04e0f440c2980ed0e9674c30ff01 test.gguf:tensor_8 - Ok
|
||||
xxh64 93fd20c64421c081 test.gguf:tensor_9 - Ok
|
||||
sha1 7047ce1e78437a6884337a3751c7ee0421918a65 test.gguf:tensor_9 - Ok
|
||||
sha256 23d57cf0d7a6e90b0b3616b41300e0cd354781e812add854a5f95aa55f2bc514 test.gguf:tensor_9 - Ok
|
||||
xxh64 5a54d3aad816f302 test.gguf - Ok
|
||||
sha1 d15be52c4ff213e823cb6dd13af7ee2f978e7042 test.gguf - Ok
|
||||
sha256 7dd641b32f59b60dbd4b5420c4b0f6321ccf48f58f6ae201a3dbc4a58a27c6e4 test.gguf - Ok
|
||||
|
||||
Verification results for test.gguf.manifest - Success
|
||||
```
|
||||
|
||||
|
||||
## Crypto/Hash Libraries Used
|
||||
|
||||
These micro c libraries dependencies was installed via the [clib c package manager](https://github.com/clibs)
|
||||
|
||||
- https://github.com/Cyan4973/xxHash
|
||||
- https://github.com/clibs/sha1/
|
||||
- https://github.com/jb55/sha256.c
|
||||
13
examples/gguf-hash/deps/rotate-bits/package.json
Normal file
13
examples/gguf-hash/deps/rotate-bits/package.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"name": "rotate-bits",
|
||||
"version": "0.1.1",
|
||||
"repo": "jb55/rotate-bits.h",
|
||||
"description": "rotate bits",
|
||||
"keywords": ["rotl", "rotr"],
|
||||
"src": ["rotate-bits.h"],
|
||||
"license": "Public Domain",
|
||||
"development": {
|
||||
"thlorenz/tap.c": "*"
|
||||
}
|
||||
}
|
||||
|
||||
46
examples/gguf-hash/deps/rotate-bits/rotate-bits.h
Normal file
46
examples/gguf-hash/deps/rotate-bits/rotate-bits.h
Normal file
@@ -0,0 +1,46 @@
|
||||
|
||||
|
||||
#ifndef __ROTATE_DEFS_H
|
||||
#define __ROTATE_DEFS_H
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#define ROTL32(v, n) _rotl((v), (n))
|
||||
#define ROTL64(v, n) _rotl64((v), (n))
|
||||
|
||||
#define ROTR32(v, n) _rotr((v), (n))
|
||||
#define ROTR64(v, n) _rotr64((v), (n))
|
||||
|
||||
#else
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define U8V(v) ((uint8_t)(v) & 0xFFU)
|
||||
#define U16V(v) ((uint16_t)(v) & 0xFFFFU)
|
||||
#define U32V(v) ((uint32_t)(v) & 0xFFFFFFFFU)
|
||||
#define U64V(v) ((uint64_t)(v) & 0xFFFFFFFFFFFFFFFFU)
|
||||
|
||||
#define ROTL32(v, n) \
|
||||
(U32V((uint32_t)(v) << (n)) | ((uint32_t)(v) >> (32 - (n))))
|
||||
|
||||
// tests fail if we don't have this cast...
|
||||
#define ROTL64(v, n) \
|
||||
(U64V((uint64_t)(v) << (n)) | ((uint64_t)(v) >> (64 - (n))))
|
||||
|
||||
#define ROTR32(v, n) ROTL32(v, 32 - (n))
|
||||
#define ROTR64(v, n) ROTL64(v, 64 - (n))
|
||||
|
||||
#endif
|
||||
|
||||
#define ROTL8(v, n) \
|
||||
(U8V((uint8_t)(v) << (n)) | ((uint8_t)(v) >> (8 - (n))))
|
||||
|
||||
#define ROTL16(v, n) \
|
||||
(U16V((uint16_t)(v) << (n)) | ((uint16_t)(v) >> (16 - (n))))
|
||||
|
||||
#define ROTR8(v, n) ROTL8(v, 8 - (n))
|
||||
#define ROTR16(v, n) ROTL16(v, 16 - (n))
|
||||
|
||||
#endif
|
||||
9
examples/gguf-hash/deps/sha1/package.json
Normal file
9
examples/gguf-hash/deps/sha1/package.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"name": "sha1",
|
||||
"version": "0.0.1",
|
||||
"repo": "clibs/sha1",
|
||||
"description": "sha1 hash algorithm",
|
||||
"keywords": ["sha1", "hash"],
|
||||
"license": "public domain",
|
||||
"src": ["sha1.c", "sha1.h"]
|
||||
}
|
||||
295
examples/gguf-hash/deps/sha1/sha1.c
Normal file
295
examples/gguf-hash/deps/sha1/sha1.c
Normal file
@@ -0,0 +1,295 @@
|
||||
/*
|
||||
SHA-1 in C
|
||||
By Steve Reid <steve@edmweb.com>
|
||||
100% Public Domain
|
||||
|
||||
Test Vectors (from FIPS PUB 180-1)
|
||||
"abc"
|
||||
A9993E36 4706816A BA3E2571 7850C26C 9CD0D89D
|
||||
"abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
|
||||
84983E44 1C3BD26E BAAE4AA1 F95129E5 E54670F1
|
||||
A million repetitions of "a"
|
||||
34AA973C D4C4DAA4 F61EEB2B DBAD2731 6534016F
|
||||
*/
|
||||
|
||||
/* #define LITTLE_ENDIAN * This should be #define'd already, if true. */
|
||||
/* #define SHA1HANDSOFF * Copies data before messing with it. */
|
||||
|
||||
#define SHA1HANDSOFF
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/* for uint32_t */
|
||||
#include <stdint.h>
|
||||
|
||||
#include "sha1.h"
|
||||
|
||||
|
||||
#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
|
||||
|
||||
/* blk0() and blk() perform the initial expand. */
|
||||
/* I got the idea of expanding during the round function from SSLeay */
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
#define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \
|
||||
|(rol(block->l[i],8)&0x00FF00FF))
|
||||
#elif BYTE_ORDER == BIG_ENDIAN
|
||||
#define blk0(i) block->l[i]
|
||||
#else
|
||||
#error "Endianness not defined!"
|
||||
#endif
|
||||
#define blk(i) (block->l[i&15] = rol(block->l[(i+13)&15]^block->l[(i+8)&15] \
|
||||
^block->l[(i+2)&15]^block->l[i&15],1))
|
||||
|
||||
/* (R0+R1), R2, R3, R4 are the different operations used in SHA1 */
|
||||
#define R0(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk0(i)+0x5A827999+rol(v,5);w=rol(w,30);
|
||||
#define R1(v,w,x,y,z,i) z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=rol(w,30);
|
||||
#define R2(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=rol(w,30);
|
||||
#define R3(v,w,x,y,z,i) z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=rol(w,30);
|
||||
#define R4(v,w,x,y,z,i) z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=rol(w,30);
|
||||
|
||||
|
||||
/* Hash a single 512-bit block. This is the core of the algorithm. */
|
||||
|
||||
void SHA1Transform(
|
||||
uint32_t state[5],
|
||||
const unsigned char buffer[64]
|
||||
)
|
||||
{
|
||||
uint32_t a, b, c, d, e;
|
||||
|
||||
typedef union
|
||||
{
|
||||
unsigned char c[64];
|
||||
uint32_t l[16];
|
||||
} CHAR64LONG16;
|
||||
|
||||
#ifdef SHA1HANDSOFF
|
||||
CHAR64LONG16 block[1]; /* use array to appear as a pointer */
|
||||
|
||||
memcpy(block, buffer, 64);
|
||||
#else
|
||||
/* The following had better never be used because it causes the
|
||||
* pointer-to-const buffer to be cast into a pointer to non-const.
|
||||
* And the result is written through. I threw a "const" in, hoping
|
||||
* this will cause a diagnostic.
|
||||
*/
|
||||
CHAR64LONG16 *block = (const CHAR64LONG16 *) buffer;
|
||||
#endif
|
||||
/* Copy context->state[] to working vars */
|
||||
a = state[0];
|
||||
b = state[1];
|
||||
c = state[2];
|
||||
d = state[3];
|
||||
e = state[4];
|
||||
/* 4 rounds of 20 operations each. Loop unrolled. */
|
||||
R0(a, b, c, d, e, 0);
|
||||
R0(e, a, b, c, d, 1);
|
||||
R0(d, e, a, b, c, 2);
|
||||
R0(c, d, e, a, b, 3);
|
||||
R0(b, c, d, e, a, 4);
|
||||
R0(a, b, c, d, e, 5);
|
||||
R0(e, a, b, c, d, 6);
|
||||
R0(d, e, a, b, c, 7);
|
||||
R0(c, d, e, a, b, 8);
|
||||
R0(b, c, d, e, a, 9);
|
||||
R0(a, b, c, d, e, 10);
|
||||
R0(e, a, b, c, d, 11);
|
||||
R0(d, e, a, b, c, 12);
|
||||
R0(c, d, e, a, b, 13);
|
||||
R0(b, c, d, e, a, 14);
|
||||
R0(a, b, c, d, e, 15);
|
||||
R1(e, a, b, c, d, 16);
|
||||
R1(d, e, a, b, c, 17);
|
||||
R1(c, d, e, a, b, 18);
|
||||
R1(b, c, d, e, a, 19);
|
||||
R2(a, b, c, d, e, 20);
|
||||
R2(e, a, b, c, d, 21);
|
||||
R2(d, e, a, b, c, 22);
|
||||
R2(c, d, e, a, b, 23);
|
||||
R2(b, c, d, e, a, 24);
|
||||
R2(a, b, c, d, e, 25);
|
||||
R2(e, a, b, c, d, 26);
|
||||
R2(d, e, a, b, c, 27);
|
||||
R2(c, d, e, a, b, 28);
|
||||
R2(b, c, d, e, a, 29);
|
||||
R2(a, b, c, d, e, 30);
|
||||
R2(e, a, b, c, d, 31);
|
||||
R2(d, e, a, b, c, 32);
|
||||
R2(c, d, e, a, b, 33);
|
||||
R2(b, c, d, e, a, 34);
|
||||
R2(a, b, c, d, e, 35);
|
||||
R2(e, a, b, c, d, 36);
|
||||
R2(d, e, a, b, c, 37);
|
||||
R2(c, d, e, a, b, 38);
|
||||
R2(b, c, d, e, a, 39);
|
||||
R3(a, b, c, d, e, 40);
|
||||
R3(e, a, b, c, d, 41);
|
||||
R3(d, e, a, b, c, 42);
|
||||
R3(c, d, e, a, b, 43);
|
||||
R3(b, c, d, e, a, 44);
|
||||
R3(a, b, c, d, e, 45);
|
||||
R3(e, a, b, c, d, 46);
|
||||
R3(d, e, a, b, c, 47);
|
||||
R3(c, d, e, a, b, 48);
|
||||
R3(b, c, d, e, a, 49);
|
||||
R3(a, b, c, d, e, 50);
|
||||
R3(e, a, b, c, d, 51);
|
||||
R3(d, e, a, b, c, 52);
|
||||
R3(c, d, e, a, b, 53);
|
||||
R3(b, c, d, e, a, 54);
|
||||
R3(a, b, c, d, e, 55);
|
||||
R3(e, a, b, c, d, 56);
|
||||
R3(d, e, a, b, c, 57);
|
||||
R3(c, d, e, a, b, 58);
|
||||
R3(b, c, d, e, a, 59);
|
||||
R4(a, b, c, d, e, 60);
|
||||
R4(e, a, b, c, d, 61);
|
||||
R4(d, e, a, b, c, 62);
|
||||
R4(c, d, e, a, b, 63);
|
||||
R4(b, c, d, e, a, 64);
|
||||
R4(a, b, c, d, e, 65);
|
||||
R4(e, a, b, c, d, 66);
|
||||
R4(d, e, a, b, c, 67);
|
||||
R4(c, d, e, a, b, 68);
|
||||
R4(b, c, d, e, a, 69);
|
||||
R4(a, b, c, d, e, 70);
|
||||
R4(e, a, b, c, d, 71);
|
||||
R4(d, e, a, b, c, 72);
|
||||
R4(c, d, e, a, b, 73);
|
||||
R4(b, c, d, e, a, 74);
|
||||
R4(a, b, c, d, e, 75);
|
||||
R4(e, a, b, c, d, 76);
|
||||
R4(d, e, a, b, c, 77);
|
||||
R4(c, d, e, a, b, 78);
|
||||
R4(b, c, d, e, a, 79);
|
||||
/* Add the working vars back into context.state[] */
|
||||
state[0] += a;
|
||||
state[1] += b;
|
||||
state[2] += c;
|
||||
state[3] += d;
|
||||
state[4] += e;
|
||||
/* Wipe variables */
|
||||
a = b = c = d = e = 0;
|
||||
#ifdef SHA1HANDSOFF
|
||||
memset(block, '\0', sizeof(block));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
/* SHA1Init - Initialize new context */
|
||||
|
||||
void SHA1Init(
|
||||
SHA1_CTX * context
|
||||
)
|
||||
{
|
||||
/* SHA1 initialization constants */
|
||||
context->state[0] = 0x67452301;
|
||||
context->state[1] = 0xEFCDAB89;
|
||||
context->state[2] = 0x98BADCFE;
|
||||
context->state[3] = 0x10325476;
|
||||
context->state[4] = 0xC3D2E1F0;
|
||||
context->count[0] = context->count[1] = 0;
|
||||
}
|
||||
|
||||
|
||||
/* Run your data through this. */
|
||||
|
||||
void SHA1Update(
|
||||
SHA1_CTX * context,
|
||||
const unsigned char *data,
|
||||
uint32_t len
|
||||
)
|
||||
{
|
||||
uint32_t i;
|
||||
|
||||
uint32_t j;
|
||||
|
||||
j = context->count[0];
|
||||
if ((context->count[0] += len << 3) < j)
|
||||
context->count[1]++;
|
||||
context->count[1] += (len >> 29);
|
||||
j = (j >> 3) & 63;
|
||||
if ((j + len) > 63)
|
||||
{
|
||||
memcpy(&context->buffer[j], data, (i = 64 - j));
|
||||
SHA1Transform(context->state, context->buffer);
|
||||
for (; i + 63 < len; i += 64)
|
||||
{
|
||||
SHA1Transform(context->state, &data[i]);
|
||||
}
|
||||
j = 0;
|
||||
}
|
||||
else
|
||||
i = 0;
|
||||
memcpy(&context->buffer[j], &data[i], len - i);
|
||||
}
|
||||
|
||||
|
||||
/* Add padding and return the message digest. */
|
||||
|
||||
void SHA1Final(
|
||||
unsigned char digest[20],
|
||||
SHA1_CTX * context
|
||||
)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
unsigned char finalcount[8];
|
||||
|
||||
unsigned char c;
|
||||
|
||||
#if 0 /* untested "improvement" by DHR */
|
||||
/* Convert context->count to a sequence of bytes
|
||||
* in finalcount. Second element first, but
|
||||
* big-endian order within element.
|
||||
* But we do it all backwards.
|
||||
*/
|
||||
unsigned char *fcp = &finalcount[8];
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
{
|
||||
uint32_t t = context->count[i];
|
||||
|
||||
int j;
|
||||
|
||||
for (j = 0; j < 4; t >>= 8, j++)
|
||||
*--fcp = (unsigned char) t}
|
||||
#else
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
finalcount[i] = (unsigned char) ((context->count[(i >= 4 ? 0 : 1)] >> ((3 - (i & 3)) * 8)) & 255); /* Endian independent */
|
||||
}
|
||||
#endif
|
||||
c = 0200;
|
||||
SHA1Update(context, &c, 1);
|
||||
while ((context->count[0] & 504) != 448)
|
||||
{
|
||||
c = 0000;
|
||||
SHA1Update(context, &c, 1);
|
||||
}
|
||||
SHA1Update(context, finalcount, 8); /* Should cause a SHA1Transform() */
|
||||
for (i = 0; i < 20; i++)
|
||||
{
|
||||
digest[i] = (unsigned char)
|
||||
((context->state[i >> 2] >> ((3 - (i & 3)) * 8)) & 255);
|
||||
}
|
||||
/* Wipe variables */
|
||||
memset(context, '\0', sizeof(*context));
|
||||
memset(&finalcount, '\0', sizeof(finalcount));
|
||||
}
|
||||
|
||||
void SHA1(
|
||||
char *hash_out,
|
||||
const char *str,
|
||||
uint32_t len)
|
||||
{
|
||||
SHA1_CTX ctx;
|
||||
unsigned int ii;
|
||||
|
||||
SHA1Init(&ctx);
|
||||
for (ii=0; ii<len; ii+=1)
|
||||
SHA1Update(&ctx, (const unsigned char*)str + ii, 1);
|
||||
SHA1Final((unsigned char *)hash_out, &ctx);
|
||||
}
|
||||
|
||||
52
examples/gguf-hash/deps/sha1/sha1.h
Normal file
52
examples/gguf-hash/deps/sha1/sha1.h
Normal file
@@ -0,0 +1,52 @@
|
||||
#ifndef SHA1_H
|
||||
#define SHA1_H
|
||||
|
||||
/*
|
||||
SHA-1 in C
|
||||
By Steve Reid <steve@edmweb.com>
|
||||
100% Public Domain
|
||||
*/
|
||||
|
||||
#include "stdint.h"
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t state[5];
|
||||
uint32_t count[2];
|
||||
unsigned char buffer[64];
|
||||
} SHA1_CTX;
|
||||
|
||||
void SHA1Transform(
|
||||
uint32_t state[5],
|
||||
const unsigned char buffer[64]
|
||||
);
|
||||
|
||||
void SHA1Init(
|
||||
SHA1_CTX * context
|
||||
);
|
||||
|
||||
void SHA1Update(
|
||||
SHA1_CTX * context,
|
||||
const unsigned char *data,
|
||||
uint32_t len
|
||||
);
|
||||
|
||||
void SHA1Final(
|
||||
unsigned char digest[20],
|
||||
SHA1_CTX * context
|
||||
);
|
||||
|
||||
void SHA1(
|
||||
char *hash_out,
|
||||
const char *str,
|
||||
uint32_t len);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* SHA1_H */
|
||||
15
examples/gguf-hash/deps/sha256/package.json
Normal file
15
examples/gguf-hash/deps/sha256/package.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"name": "sha256",
|
||||
"version": "0.0.2",
|
||||
"repo": "jb55/sha256.c",
|
||||
"description": "sha256 in c",
|
||||
"keywords": ["sha256", "sha2"],
|
||||
"src": ["sha256.c", "sha256.h"],
|
||||
"dependencies": {
|
||||
"jb55/rotate-bits.h": "0.1.1"
|
||||
},
|
||||
"development": {
|
||||
"thlorenz/tap.c": "*"
|
||||
}
|
||||
}
|
||||
|
||||
221
examples/gguf-hash/deps/sha256/sha256.c
Normal file
221
examples/gguf-hash/deps/sha256/sha256.c
Normal file
@@ -0,0 +1,221 @@
|
||||
/* Crypto/Sha256.c -- SHA-256 Hash
|
||||
2010-06-11 : Igor Pavlov : Public domain
|
||||
This code is based on public domain code from Wei Dai's Crypto++ library. */
|
||||
|
||||
#include "rotate-bits/rotate-bits.h"
|
||||
#include "sha256.h"
|
||||
|
||||
/* define it for speed optimization */
|
||||
#define _SHA256_UNROLL
|
||||
#define _SHA256_UNROLL2
|
||||
|
||||
void
|
||||
sha256_init(sha256_t *p)
|
||||
{
|
||||
p->state[0] = 0x6a09e667;
|
||||
p->state[1] = 0xbb67ae85;
|
||||
p->state[2] = 0x3c6ef372;
|
||||
p->state[3] = 0xa54ff53a;
|
||||
p->state[4] = 0x510e527f;
|
||||
p->state[5] = 0x9b05688c;
|
||||
p->state[6] = 0x1f83d9ab;
|
||||
p->state[7] = 0x5be0cd19;
|
||||
p->count = 0;
|
||||
}
|
||||
|
||||
#define S0(x) (ROTR32(x, 2) ^ ROTR32(x,13) ^ ROTR32(x, 22))
|
||||
#define S1(x) (ROTR32(x, 6) ^ ROTR32(x,11) ^ ROTR32(x, 25))
|
||||
#define s0(x) (ROTR32(x, 7) ^ ROTR32(x,18) ^ (x >> 3))
|
||||
#define s1(x) (ROTR32(x,17) ^ ROTR32(x,19) ^ (x >> 10))
|
||||
|
||||
#define blk0(i) (W[i] = data[i])
|
||||
#define blk2(i) (W[i&15] += s1(W[(i-2)&15]) + W[(i-7)&15] + s0(W[(i-15)&15]))
|
||||
|
||||
#define Ch(x,y,z) (z^(x&(y^z)))
|
||||
#define Maj(x,y,z) ((x&y)|(z&(x|y)))
|
||||
|
||||
#define a(i) T[(0-(i))&7]
|
||||
#define b(i) T[(1-(i))&7]
|
||||
#define c(i) T[(2-(i))&7]
|
||||
#define d(i) T[(3-(i))&7]
|
||||
#define e(i) T[(4-(i))&7]
|
||||
#define f(i) T[(5-(i))&7]
|
||||
#define g(i) T[(6-(i))&7]
|
||||
#define h(i) T[(7-(i))&7]
|
||||
|
||||
|
||||
#ifdef _SHA256_UNROLL2
|
||||
|
||||
#define R(a,b,c,d,e,f,g,h, i) h += S1(e) + Ch(e,f,g) + K[i+j] + (j?blk2(i):blk0(i));\
|
||||
d += h; h += S0(a) + Maj(a, b, c)
|
||||
|
||||
#define RX_8(i) \
|
||||
R(a,b,c,d,e,f,g,h, i); \
|
||||
R(h,a,b,c,d,e,f,g, (i+1)); \
|
||||
R(g,h,a,b,c,d,e,f, (i+2)); \
|
||||
R(f,g,h,a,b,c,d,e, (i+3)); \
|
||||
R(e,f,g,h,a,b,c,d, (i+4)); \
|
||||
R(d,e,f,g,h,a,b,c, (i+5)); \
|
||||
R(c,d,e,f,g,h,a,b, (i+6)); \
|
||||
R(b,c,d,e,f,g,h,a, (i+7))
|
||||
|
||||
#else
|
||||
|
||||
#define R(i) h(i) += S1(e(i)) + Ch(e(i),f(i),g(i)) + K[i+j] + (j?blk2(i):blk0(i));\
|
||||
d(i) += h(i); h(i) += S0(a(i)) + Maj(a(i), b(i), c(i))
|
||||
|
||||
#ifdef _SHA256_UNROLL
|
||||
|
||||
#define RX_8(i) R(i+0); R(i+1); R(i+2); R(i+3); R(i+4); R(i+5); R(i+6); R(i+7);
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
static const uint32_t K[64] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
};
|
||||
|
||||
static void
|
||||
sha256_transform(uint32_t *state, const uint32_t *data)
|
||||
{
|
||||
uint32_t W[16] = {0};
|
||||
unsigned j;
|
||||
#ifdef _SHA256_UNROLL2
|
||||
uint32_t a,b,c,d,e,f,g,h;
|
||||
a = state[0];
|
||||
b = state[1];
|
||||
c = state[2];
|
||||
d = state[3];
|
||||
e = state[4];
|
||||
f = state[5];
|
||||
g = state[6];
|
||||
h = state[7];
|
||||
#else
|
||||
uint32_t T[8];
|
||||
for (j = 0; j < 8; j++)
|
||||
T[j] = state[j];
|
||||
#endif
|
||||
|
||||
for (j = 0; j < 64; j += 16)
|
||||
{
|
||||
#if defined(_SHA256_UNROLL) || defined(_SHA256_UNROLL2)
|
||||
RX_8(0); RX_8(8);
|
||||
#else
|
||||
unsigned i;
|
||||
for (i = 0; i < 16; i++) { R(i); }
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef _SHA256_UNROLL2
|
||||
state[0] += a;
|
||||
state[1] += b;
|
||||
state[2] += c;
|
||||
state[3] += d;
|
||||
state[4] += e;
|
||||
state[5] += f;
|
||||
state[6] += g;
|
||||
state[7] += h;
|
||||
#else
|
||||
for (j = 0; j < 8; j++)
|
||||
state[j] += T[j];
|
||||
#endif
|
||||
|
||||
/* Wipe variables */
|
||||
/* memset(W, 0, sizeof(W)); */
|
||||
/* memset(T, 0, sizeof(T)); */
|
||||
}
|
||||
|
||||
#undef S0
|
||||
#undef S1
|
||||
#undef s0
|
||||
#undef s1
|
||||
|
||||
static void
|
||||
sha256_write_byte_block(sha256_t *p)
|
||||
{
|
||||
uint32_t data32[16];
|
||||
unsigned i;
|
||||
for (i = 0; i < 16; i++)
|
||||
data32[i] =
|
||||
((uint32_t)(p->buffer[i * 4 ]) << 24) +
|
||||
((uint32_t)(p->buffer[i * 4 + 1]) << 16) +
|
||||
((uint32_t)(p->buffer[i * 4 + 2]) << 8) +
|
||||
((uint32_t)(p->buffer[i * 4 + 3]));
|
||||
sha256_transform(p->state, data32);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
sha256_hash(unsigned char *buf, const unsigned char *data, size_t size)
|
||||
{
|
||||
sha256_t hash;
|
||||
sha256_init(&hash);
|
||||
sha256_update(&hash, data, size);
|
||||
sha256_final(&hash, buf);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
sha256_update(sha256_t *p, const unsigned char *data, size_t size)
|
||||
{
|
||||
uint32_t curBufferPos = (uint32_t)p->count & 0x3F;
|
||||
while (size > 0)
|
||||
{
|
||||
p->buffer[curBufferPos++] = *data++;
|
||||
p->count++;
|
||||
size--;
|
||||
if (curBufferPos == 64)
|
||||
{
|
||||
curBufferPos = 0;
|
||||
sha256_write_byte_block(p);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
sha256_final(sha256_t *p, unsigned char *digest)
|
||||
{
|
||||
uint64_t lenInBits = (p->count << 3);
|
||||
uint32_t curBufferPos = (uint32_t)p->count & 0x3F;
|
||||
unsigned i;
|
||||
p->buffer[curBufferPos++] = 0x80;
|
||||
while (curBufferPos != (64 - 8))
|
||||
{
|
||||
curBufferPos &= 0x3F;
|
||||
if (curBufferPos == 0)
|
||||
sha256_write_byte_block(p);
|
||||
p->buffer[curBufferPos++] = 0;
|
||||
}
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
p->buffer[curBufferPos++] = (unsigned char)(lenInBits >> 56);
|
||||
lenInBits <<= 8;
|
||||
}
|
||||
sha256_write_byte_block(p);
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
*digest++ = (unsigned char)(p->state[i] >> 24);
|
||||
*digest++ = (unsigned char)(p->state[i] >> 16);
|
||||
*digest++ = (unsigned char)(p->state[i] >> 8);
|
||||
*digest++ = (unsigned char)(p->state[i]);
|
||||
}
|
||||
sha256_init(p);
|
||||
}
|
||||
24
examples/gguf-hash/deps/sha256/sha256.h
Normal file
24
examples/gguf-hash/deps/sha256/sha256.h
Normal file
@@ -0,0 +1,24 @@
|
||||
/* Sha256.h -- SHA-256 Hash
|
||||
2010-06-11 : Igor Pavlov : Public domain */
|
||||
|
||||
#ifndef __CRYPTO_SHA256_H
|
||||
#define __CRYPTO_SHA256_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define SHA256_DIGEST_SIZE 32
|
||||
|
||||
typedef struct sha256_t
|
||||
{
|
||||
uint32_t state[8];
|
||||
uint64_t count;
|
||||
unsigned char buffer[64];
|
||||
} sha256_t;
|
||||
|
||||
void sha256_init(sha256_t *p);
|
||||
void sha256_update(sha256_t *p, const unsigned char *data, size_t size);
|
||||
void sha256_final(sha256_t *p, unsigned char *digest);
|
||||
void sha256_hash(unsigned char *buf, const unsigned char *data, size_t size);
|
||||
|
||||
#endif
|
||||
12
examples/gguf-hash/deps/xxhash/clib.json
Normal file
12
examples/gguf-hash/deps/xxhash/clib.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"name": "xxhash",
|
||||
"version": "0.8.2",
|
||||
"repo": "Cyan4973/xxhash",
|
||||
"description": "Extremely fast non-cryptographic hash algorithm",
|
||||
"keywords": ["xxhash", "hashing"],
|
||||
"license": "BSD-2-Clause",
|
||||
"src": [
|
||||
"xxhash.c",
|
||||
"xxhash.h"
|
||||
]
|
||||
}
|
||||
42
examples/gguf-hash/deps/xxhash/xxhash.c
Normal file
42
examples/gguf-hash/deps/xxhash/xxhash.c
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* xxHash - Extremely Fast Hash algorithm
|
||||
* Copyright (C) 2012-2023 Yann Collet
|
||||
*
|
||||
* BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
*
|
||||
* * Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* * Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following disclaimer
|
||||
* in the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
* You can contact the author at:
|
||||
* - xxHash homepage: https://www.xxhash.com
|
||||
* - xxHash source repository: https://github.com/Cyan4973/xxHash
|
||||
*/
|
||||
|
||||
/*
|
||||
* xxhash.c instantiates functions defined in xxhash.h
|
||||
*/
|
||||
|
||||
#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
|
||||
#define XXH_IMPLEMENTATION /* access definitions */
|
||||
|
||||
#include "xxhash.h"
|
||||
7093
examples/gguf-hash/deps/xxhash/xxhash.h
Normal file
7093
examples/gguf-hash/deps/xxhash/xxhash.h
Normal file
File diff suppressed because it is too large
Load Diff
694
examples/gguf-hash/gguf-hash.cpp
Normal file
694
examples/gguf-hash/gguf-hash.cpp
Normal file
@@ -0,0 +1,694 @@
|
||||
#include "ggml.h"
|
||||
#include "gguf.h"
|
||||
|
||||
#include <cstdlib> /* abort() */
|
||||
#include <cstddef>
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "xxhash/xxhash.h"
|
||||
#include "sha1/sha1.h"
|
||||
#include "sha256/sha256.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
// uuid.uuid5(uuid.NAMESPACE_URL, 'en.wikipedia.org/wiki/Llama.cpp')
|
||||
#define UUID_NAMESPACE_LLAMA_CPP "ef001206-dadc-5f6d-a15f-3359e577d4e5"
|
||||
#define UUID_NAMESPACE_LLAMA_CPP_HEX 0xef, 0x00, 0x12, 0x06, 0xda, 0xdc, 0x5f, 0x6d, 0xa1, 0x5f, 0x33, 0x59, 0xe5, 0x77, 0xd4, 0xe5
|
||||
|
||||
|
||||
#define HASH_TYPE_SHA256_STR "sha256"
|
||||
#define HASH_TYPE_SHA1_STR "sha1"
|
||||
#define HASH_TYPE_XXH64_STR "xxh64"
|
||||
#define HASH_TYPE_UUID_STR "uuid"
|
||||
|
||||
|
||||
typedef enum {
|
||||
HASH_EXIT_SUCCESS = 0, // All hash has been generated or validated
|
||||
HASH_EXIT_FAILURE = 1, // Generic Failure
|
||||
HASH_EXIT_MISMATCH = 2, // Hash mismatched during validation
|
||||
HASH_EXIT_MANIFEST_MISSING_ENTRY = 3, // Hash attempted validation but missing entry in manifest
|
||||
HASH_EXIT_MANIFEST_UNKNOWN_HASH = 4, // Manifest is present, but we do not know any hash format within it
|
||||
HASH_EXIT_MANIFEST_FILE_ERROR = 5 // Manifest is either missing or not a known format
|
||||
} hash_exit_code_t;
|
||||
|
||||
|
||||
typedef enum {
|
||||
HASH_MANIFEST_NOT_FOUND,
|
||||
HASH_MANIFEST_MISMATCH,
|
||||
HASH_MANIFEST_OK,
|
||||
} hash_manifest_result_t;
|
||||
|
||||
|
||||
struct hash_params {
|
||||
std::string input;
|
||||
bool xxh64 = false;
|
||||
bool sha1 = false;
|
||||
bool sha256 = false;
|
||||
bool uuid = false;
|
||||
|
||||
bool no_layer = false;
|
||||
|
||||
bool manifest_is_usable = false;
|
||||
std::string manifest_file;
|
||||
};
|
||||
|
||||
struct manifest_check_params {
|
||||
bool xxh64 = false;
|
||||
bool sha1 = false;
|
||||
bool sha256 = false;
|
||||
bool uuid = false;
|
||||
};
|
||||
|
||||
static char const * hash_manifest_result_to_str(hash_manifest_result_t value) {
|
||||
switch (value) {
|
||||
case HASH_MANIFEST_NOT_FOUND: return "Not Found";
|
||||
case HASH_MANIFEST_MISMATCH: return "Mismatch";
|
||||
case HASH_MANIFEST_OK: return "Ok";
|
||||
}
|
||||
return "?";
|
||||
}
|
||||
|
||||
static char const * hash_exit_code_to_str(hash_exit_code_t value) {
|
||||
switch (value) {
|
||||
case HASH_EXIT_SUCCESS: return "Success";
|
||||
case HASH_EXIT_FAILURE: return "Failure";
|
||||
case HASH_EXIT_MISMATCH: return "Mismatch";
|
||||
case HASH_EXIT_MANIFEST_MISSING_ENTRY: return "Manifest Missing Entry";
|
||||
case HASH_EXIT_MANIFEST_UNKNOWN_HASH: return "Manifest Unknown Hash";
|
||||
case HASH_EXIT_MANIFEST_FILE_ERROR: return "Manifest File Error";
|
||||
}
|
||||
return "?";
|
||||
}
|
||||
|
||||
static void hash_print_usage(const char * executable) {
|
||||
const hash_params default_params;
|
||||
printf("\n");
|
||||
printf("usage: %s [options] GGUF_IN\n", executable);
|
||||
printf("\n");
|
||||
printf("Hash a GGUF file");
|
||||
printf("\n");
|
||||
printf("options:\n");
|
||||
printf(" -h, --help show this help message and exit\n");
|
||||
printf(" --xxh64 use xxh64 hash\n");
|
||||
printf(" --sha1 use sha1 hash\n");
|
||||
printf(" --sha256 use sha256 hash\n");
|
||||
printf(" --all use all hash\n");
|
||||
printf(" --no-layer exclude per layer hash\n");
|
||||
printf(" --uuid generate UUIDv5 ID\n");
|
||||
printf(" -c, --check <manifest> verify against a manifest\n");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
static void hash_params_parse_ex(int argc, const char ** argv, hash_params & params) {
|
||||
std::string arg;
|
||||
bool invalid_param = false;
|
||||
const std::string arg_prefix = "--";
|
||||
|
||||
int arg_idx = 1;
|
||||
for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) {
|
||||
arg = argv[arg_idx];
|
||||
if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) {
|
||||
std::replace(arg.begin(), arg.end(), '_', '-');
|
||||
}
|
||||
|
||||
bool arg_found = false;
|
||||
if (arg == "-h" || arg == "--help") {
|
||||
hash_print_usage(argv[0]);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (arg == "--xxh64") {
|
||||
arg_found = true;
|
||||
params.xxh64 = true;
|
||||
}
|
||||
|
||||
if (arg == "--sha1") {
|
||||
arg_found = true;
|
||||
params.sha1 = true;
|
||||
}
|
||||
|
||||
if (arg == "--uuid") {
|
||||
arg_found = true;
|
||||
params.uuid = true;
|
||||
}
|
||||
|
||||
if (arg == "--sha256") {
|
||||
arg_found = true;
|
||||
params.sha256 = true;
|
||||
}
|
||||
|
||||
if (arg == "--all") {
|
||||
arg_found = true;
|
||||
params.sha256 = true;
|
||||
params.sha1 = true;
|
||||
params.xxh64 = true;
|
||||
}
|
||||
|
||||
if (arg == "--no-layer") {
|
||||
arg_found = true;
|
||||
params.no_layer = true;
|
||||
}
|
||||
|
||||
if (arg == "-c" || arg == "--check") {
|
||||
if (++arg_idx >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
arg_found = true;
|
||||
params.manifest_file = argv[arg_idx];
|
||||
}
|
||||
|
||||
if (!arg_found) {
|
||||
throw std::invalid_argument("error: unknown argument: " + arg);
|
||||
}
|
||||
}
|
||||
|
||||
if (invalid_param) {
|
||||
throw std::invalid_argument("error: invalid parameter for argument:" + arg);
|
||||
}
|
||||
|
||||
if (argc - arg_idx < 1) {
|
||||
throw std::invalid_argument("error: bad arguments");
|
||||
}
|
||||
|
||||
params.input = argv[arg_idx++];
|
||||
}
|
||||
|
||||
static bool hash_params_parse(int argc, const char ** argv, hash_params & params) {
|
||||
bool result = true;
|
||||
try {
|
||||
hash_params_parse_ex(argc, argv, params);
|
||||
}
|
||||
catch (const std::invalid_argument & ex) {
|
||||
fprintf(stderr, "%s\n", ex.what());
|
||||
hash_print_usage(argv[0]);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static bool manifest_type(const std::string & manifest_file, manifest_check_params & manifest_check) {
|
||||
if (manifest_file.empty()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::ifstream file(manifest_file);
|
||||
if (!file.is_open()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string manifest_entry_line;
|
||||
while (getline(file, manifest_entry_line)) {
|
||||
// hash_type_str hash_str tensor_name
|
||||
// e.g. 'xxh64 f66e9cd66a4396a0 test.gguf:tensor_0'
|
||||
std::istringstream line_stream(manifest_entry_line);
|
||||
std::string file_hash_type;
|
||||
if (line_stream >> file_hash_type) {
|
||||
if (file_hash_type == HASH_TYPE_SHA256_STR) {
|
||||
manifest_check.sha256 = true;
|
||||
} else if (file_hash_type == HASH_TYPE_SHA1_STR) {
|
||||
manifest_check.sha1 = true;
|
||||
} else if (file_hash_type == HASH_TYPE_XXH64_STR) {
|
||||
manifest_check.xxh64 = true;
|
||||
} else if (file_hash_type == HASH_TYPE_UUID_STR) {
|
||||
manifest_check.uuid = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static hash_manifest_result_t manifest_verify(const std::string& manifest_file, const std::string& hash_type_str, const std::string& hash_str, const std::string& tensor_name) {
|
||||
if (manifest_file.empty()) {
|
||||
return HASH_MANIFEST_NOT_FOUND;
|
||||
}
|
||||
|
||||
std::ifstream file(manifest_file);
|
||||
if (!file.is_open()) {
|
||||
return HASH_MANIFEST_NOT_FOUND;
|
||||
}
|
||||
|
||||
std::string manifest_entry_line;
|
||||
while (getline(file, manifest_entry_line)) {
|
||||
std::istringstream line_stream(manifest_entry_line);
|
||||
std::string file_hash_type;
|
||||
std::string file_hash;
|
||||
std::string file_tensor_name;
|
||||
if (line_stream >> file_hash_type >> file_hash >> file_tensor_name) {
|
||||
// Line parsed. Check hash validity
|
||||
|
||||
if (file_hash_type != hash_type_str) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (file_tensor_name != tensor_name) {
|
||||
continue;
|
||||
}
|
||||
|
||||
return (file_hash == hash_str) ? HASH_MANIFEST_OK : HASH_MANIFEST_MISMATCH;
|
||||
}
|
||||
}
|
||||
|
||||
return HASH_MANIFEST_NOT_FOUND;
|
||||
}
|
||||
|
||||
static void generate_uuidv5(const unsigned char sha1_digest[20], unsigned char uuid[16]) {
|
||||
// Ref: https://www.rfc-editor.org/rfc/rfc9562.html#section-5.5
|
||||
// Assumes that digest was processed correctly with the expected namespace
|
||||
for (int i = 0; i < 16; i++) {
|
||||
uuid[i] = sha1_digest[i];
|
||||
}
|
||||
|
||||
// Set bits corresponding to UUID ver 5
|
||||
uuid[ 6] &= ~(0xF << 4);
|
||||
uuid[ 6] |= (5 << 4);
|
||||
|
||||
// Set bits corresponding to UUID variant 0b10XX
|
||||
uuid[ 8] &= ~(0xc << 4);
|
||||
uuid[ 8] |= (0x8 << 4);
|
||||
}
|
||||
|
||||
static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
|
||||
const std::string & fname = hash_params.input;
|
||||
struct ggml_context * ctx_data = NULL;
|
||||
|
||||
struct gguf_init_params params = {
|
||||
/*.no_alloc = */ false,
|
||||
/*.ctx = */ &ctx_data,
|
||||
};
|
||||
|
||||
// xxh64 init
|
||||
XXH64_state_t* xxh64_model_hash_state = NULL;
|
||||
if (hash_params.xxh64) {
|
||||
xxh64_model_hash_state = XXH64_createState();
|
||||
if (xxh64_model_hash_state==NULL) {
|
||||
abort();
|
||||
}
|
||||
|
||||
XXH64_hash_t const seed = 0;
|
||||
if (XXH64_reset(xxh64_model_hash_state, seed) == XXH_ERROR) {
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
// sha1 init
|
||||
SHA1_CTX sha1_model_hash_ctx;
|
||||
if (hash_params.sha1) {
|
||||
SHA1Init(&sha1_model_hash_ctx);
|
||||
}
|
||||
|
||||
// sha256 init
|
||||
sha256_t sha256_model_hash_ctx;
|
||||
if (hash_params.sha256) {
|
||||
sha256_init(&sha256_model_hash_ctx);
|
||||
}
|
||||
|
||||
// sha1 for uuid init
|
||||
SHA1_CTX sha1_for_uuid_ctx;
|
||||
if (hash_params.uuid) {
|
||||
unsigned char const uuidv5_namespace[] = {UUID_NAMESPACE_LLAMA_CPP_HEX};
|
||||
SHA1Init(&sha1_for_uuid_ctx);
|
||||
SHA1Update( &sha1_for_uuid_ctx, (unsigned char const *)uuidv5_namespace, sizeof(uuidv5_namespace));
|
||||
}
|
||||
|
||||
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
|
||||
const int n_tensors = gguf_get_n_tensors(ctx);
|
||||
bool tensor_layer_in_manifest = false;
|
||||
bool model_in_manifest = false;
|
||||
bool tensor_layer_has_mismatch = false;
|
||||
bool model_has_mismatch = false;
|
||||
for (int i = 0; i < n_tensors; ++i) {
|
||||
const char * name = gguf_get_tensor_name(ctx, i);
|
||||
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
|
||||
auto n_bytes = ggml_nbytes(cur);
|
||||
auto *raw_data = cur->data;
|
||||
const std::string tensor_layer_name = fname + ":" + name;
|
||||
|
||||
if (hash_params.xxh64) {
|
||||
|
||||
if (!hash_params.no_layer) {
|
||||
// Per Layer Hash
|
||||
XXH64_hash_t hash = XXH64(raw_data, n_bytes, 0);
|
||||
|
||||
char hex_result[17];
|
||||
for (int offset = 0; offset < 8; offset++) {
|
||||
unsigned int shift_bits_by = (8 * (8 - offset - 1));
|
||||
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", (unsigned char) (hash >> shift_bits_by)&0xff);
|
||||
}
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_XXH64_STR, hex_result, tensor_layer_name);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
tensor_layer_in_manifest = true;
|
||||
tensor_layer_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
tensor_layer_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_XXH64_STR, hex_result, tensor_layer_name.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_XXH64_STR, hex_result, tensor_layer_name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Overall Model Hash
|
||||
if (XXH64_update(xxh64_model_hash_state, raw_data, n_bytes) == XXH_ERROR) abort();
|
||||
}
|
||||
|
||||
if (hash_params.sha1) {
|
||||
|
||||
if (!hash_params.no_layer) {
|
||||
// Per Layer Hash
|
||||
char result[21]; // sha1 outputs 20 bytes
|
||||
SHA1( result, (const char *)raw_data, n_bytes);
|
||||
|
||||
char hex_result[41] = {0};
|
||||
for (int offset = 0; offset < 20; offset++) {
|
||||
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", result[offset]&0xff);
|
||||
}
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA1_STR, hex_result, tensor_layer_name);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
tensor_layer_in_manifest = true;
|
||||
tensor_layer_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
tensor_layer_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA1_STR, hex_result, tensor_layer_name.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_SHA1_STR, hex_result, tensor_layer_name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Overall Model Hash
|
||||
SHA1Update( &sha1_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
|
||||
}
|
||||
|
||||
if (hash_params.sha256) {
|
||||
|
||||
if (!hash_params.no_layer) {
|
||||
// Per Layer Hash
|
||||
unsigned char result[SHA256_DIGEST_SIZE]; // sha256 outputs 32 bytes
|
||||
sha256_hash((unsigned char*) result, (const unsigned char *)raw_data, n_bytes);
|
||||
|
||||
char hex_result[SHA256_DIGEST_SIZE * 2 + 1] = {0};
|
||||
for (int offset = 0; offset < SHA256_DIGEST_SIZE; offset++) {
|
||||
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", result[offset]&0xff);
|
||||
}
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA256_STR, hex_result, tensor_layer_name);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
tensor_layer_in_manifest = true;
|
||||
tensor_layer_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
tensor_layer_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA256_STR, hex_result, tensor_layer_name.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_SHA256_STR, hex_result, tensor_layer_name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
// Overall Model Hash
|
||||
sha256_update( &sha256_model_hash_ctx, (unsigned char const *)raw_data, n_bytes);
|
||||
}
|
||||
|
||||
if (hash_params.uuid) {
|
||||
SHA1Update( &sha1_for_uuid_ctx, (unsigned char const *)raw_data, n_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
if (hash_params.xxh64) {
|
||||
XXH64_hash_t const hash = XXH64_digest(xxh64_model_hash_state);
|
||||
|
||||
char hex_result[17];
|
||||
for (int offset = 0; offset < 8; offset++) {
|
||||
unsigned int shift_bits_by = (8 * (8 - offset - 1));
|
||||
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", (unsigned char) (hash >> shift_bits_by)&0xff);
|
||||
}
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_XXH64_STR, hex_result, fname);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
model_in_manifest = true;
|
||||
model_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
model_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_XXH64_STR, hex_result, fname.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_XXH64_STR, hex_result, fname.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (hash_params.sha1) {
|
||||
unsigned char result[21];
|
||||
SHA1Final(result, &sha1_model_hash_ctx);
|
||||
|
||||
char hex_result[41];
|
||||
for (int offset = 0; offset < 20; offset++) {
|
||||
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", result[offset]&0xff);
|
||||
}
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA1_STR, hex_result, fname);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
model_in_manifest = true;
|
||||
model_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
model_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA1_STR, hex_result, fname.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_SHA1_STR, hex_result, fname.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (hash_params.sha256) {
|
||||
unsigned char result[SHA256_DIGEST_SIZE]; // sha256 outputs 32 bytes
|
||||
sha256_final( &sha256_model_hash_ctx, result);
|
||||
|
||||
char hex_result[SHA256_DIGEST_SIZE * 2 + 1] = {0};
|
||||
for (int offset = 0; offset < SHA256_DIGEST_SIZE; offset++) {
|
||||
snprintf( ( hex_result + (2*offset)), sizeof(hex_result) - (2*offset), "%02x", result[offset]&0xff);
|
||||
}
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA256_STR, hex_result, fname);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
model_in_manifest = true;
|
||||
model_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
model_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_SHA256_STR, hex_result, fname.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_SHA256_STR, hex_result, fname.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
if (hash_params.uuid) {
|
||||
unsigned char result[21];
|
||||
SHA1Final(result, &sha1_for_uuid_ctx);
|
||||
|
||||
unsigned char uuid[16];
|
||||
generate_uuidv5(result, uuid);
|
||||
|
||||
char string_buffer[37] = {0};
|
||||
snprintf(string_buffer, sizeof(string_buffer), "%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
|
||||
uuid[0], uuid[1], uuid[2], uuid[3],
|
||||
uuid[4], uuid[5], uuid[6], uuid[7],
|
||||
uuid[8], uuid[9], uuid[10], uuid[11],
|
||||
uuid[12], uuid[13], uuid[14], uuid[15]);
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
hash_manifest_result_t verify_result = manifest_verify(hash_params.manifest_file, HASH_TYPE_SHA256_STR, string_buffer, fname);
|
||||
|
||||
switch (verify_result) {
|
||||
case HASH_MANIFEST_NOT_FOUND:
|
||||
break;
|
||||
case HASH_MANIFEST_MISMATCH:
|
||||
model_in_manifest = true;
|
||||
model_has_mismatch = true;
|
||||
break;
|
||||
case HASH_MANIFEST_OK:
|
||||
model_in_manifest = true;
|
||||
break;
|
||||
}
|
||||
|
||||
printf("%-8s %-s %s - %s\n", HASH_TYPE_UUID_STR, string_buffer, fname.c_str(), hash_manifest_result_to_str(verify_result));
|
||||
} else {
|
||||
printf("%-8s %-s %s\n", HASH_TYPE_UUID_STR, string_buffer, fname.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ggml_free(ctx_data);
|
||||
gguf_free(ctx);
|
||||
|
||||
|
||||
if (hash_params.manifest_is_usable) {
|
||||
// In hash verification mode
|
||||
|
||||
if (!model_in_manifest) {
|
||||
// model missing in manifest?
|
||||
|
||||
// Check tensor layer...
|
||||
if (!tensor_layer_in_manifest) {
|
||||
// Still missing? Maybe we are reading the wrong manifest.
|
||||
return HASH_EXIT_MANIFEST_MISSING_ENTRY;
|
||||
}
|
||||
|
||||
if (tensor_layer_has_mismatch) {
|
||||
// Per tensor check found error
|
||||
return HASH_EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// All per tensor layer checks passed? Sounds good enough.
|
||||
return HASH_EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
// Overall model check passed, but let's check per layer just in case
|
||||
// If missing, we don't care too much as the overall model checked
|
||||
if (tensor_layer_in_manifest && tensor_layer_has_mismatch) {
|
||||
return HASH_EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (model_has_mismatch) {
|
||||
// model has failed hash somewhere in the model
|
||||
return HASH_EXIT_FAILURE;
|
||||
}
|
||||
|
||||
// All checks appears to be fine
|
||||
return HASH_EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
// In hash generation mode
|
||||
return HASH_EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
int main(int argc, const char ** argv) {
|
||||
hash_params params;
|
||||
manifest_check_params manifest_check;
|
||||
hash_params_parse(argc, argv, params);
|
||||
|
||||
if (!params.manifest_file.empty()) {
|
||||
if (!manifest_type(params.manifest_file, manifest_check)) {
|
||||
printf("ERROR cannot open manifest %s", params.manifest_file.c_str());
|
||||
return HASH_EXIT_MANIFEST_FILE_ERROR;
|
||||
}
|
||||
|
||||
if (!manifest_check.sha256 && !manifest_check.sha1 && !manifest_check.xxh64 && !manifest_check.uuid) {
|
||||
printf("ERROR manifest does not have any known hash format in %s", params.manifest_file.c_str());
|
||||
return HASH_EXIT_MANIFEST_UNKNOWN_HASH;
|
||||
}
|
||||
|
||||
printf("manifest %s", params.manifest_file.c_str());
|
||||
|
||||
if (manifest_check.sha256) {
|
||||
printf(" sha256");
|
||||
}
|
||||
|
||||
if (manifest_check.sha1) {
|
||||
printf(" sha1");
|
||||
}
|
||||
|
||||
if (manifest_check.xxh64) {
|
||||
printf(" xxh64");
|
||||
}
|
||||
|
||||
if (manifest_check.uuid) {
|
||||
printf(" uuid");
|
||||
}
|
||||
|
||||
printf("\n");
|
||||
|
||||
// Autoselect the highest security hash if manifest is provided but
|
||||
// the user has not specifically defined the hash they care about
|
||||
if (!params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
|
||||
// User has not selected a specific value, pick most secure hash
|
||||
if (manifest_check.sha256) {
|
||||
params.sha256 = true;
|
||||
} else if (manifest_check.sha1) {
|
||||
params.sha1 = true;
|
||||
} else if (manifest_check.xxh64) {
|
||||
params.xxh64 = true;
|
||||
} else if (manifest_check.uuid) {
|
||||
params.uuid = true;
|
||||
}
|
||||
}
|
||||
|
||||
params.manifest_is_usable = true;
|
||||
}
|
||||
|
||||
// By default if no swich argument provided, assume xxh64
|
||||
if (!params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
|
||||
params.xxh64 = true;
|
||||
}
|
||||
|
||||
hash_exit_code_t exit_code = gguf_hash(params);
|
||||
|
||||
if (params.manifest_is_usable) {
|
||||
printf("\nVerification results for %s - %s\n", params.manifest_file.c_str(), hash_exit_code_to_str(exit_code));
|
||||
}
|
||||
|
||||
return exit_code;
|
||||
}
|
||||
Reference in New Issue
Block a user