Generate Perplexity, KLD, ARC, HellaSwag, MMLU, Truthful QA and WinoGrande scores

This commit is contained in:
Ed Addario
2025-07-01 08:18:25 +01:00
parent 3a6bd3ae0d
commit 664228d4d7
77 changed files with 1770 additions and 0 deletions

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from ./Dolphin-Mistral-24B-Venice-Edition-F16.gguf (version GGUF V3 (latest))
Final result: 70.8000 +/- 1.6614
Random chance: 25.0083 +/- 1.5824
llama_perf_context_print: load time = 40685.08 ms
llama_perf_context_print: prompt eval time = 151474.18 ms / 36666 tokens ( 4.13 ms per token, 242.06 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 152313.64 ms / 36667 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,20 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from ./Dolphin-Mistral-24B-Venice-Edition-F16.gguf (version GGUF V3 (latest))
750 84.53333333% [81.7702%, 86.9445%]
llama_perf_context_print: load time = 2898.86 ms
llama_perf_context_print: prompt eval time = 545800.60 ms / 129319 tokens ( 4.22 ms per token, 236.93 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 549439.05 ms / 129320 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from ./Dolphin-Mistral-24B-Venice-Edition-F16.gguf (version GGUF V3 (latest))
Final result: 45.3333 +/- 1.8190
Random chance: 25.0000 +/- 1.5822
llama_perf_context_print: load time = 2912.06 ms
llama_perf_context_print: prompt eval time = 289195.06 ms / 68956 tokens ( 4.19 ms per token, 238.44 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 290438.95 ms / 68957 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from ./Dolphin-Mistral-24B-Venice-Edition-F16.gguf (version GGUF V3 (latest))
Final result: 38.1333 +/- 1.7748
Random chance: 19.8992 +/- 1.4588
llama_perf_context_print: load time = 2866.02 ms
llama_perf_context_print: prompt eval time = 221192.17 ms / 51053 tokens ( 4.33 ms per token, 230.81 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 222713.65 ms / 51054 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,19 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from ./Dolphin-Mistral-24B-Venice-Edition-F16.gguf (version GGUF V3 (latest))
Final Winogrande score(750 tasks): 80.2667 +/- 1.4542
llama_perf_context_print: load time = 2915.27 ms
llama_perf_context_print: prompt eval time = 93765.90 ms / 22541 tokens ( 4.16 ms per token, 240.40 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 94258.84 ms / 22542 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_M.gguf (version GGUF V3 (latest))
Final result: 65.6000 +/- 1.7358
Random chance: 25.0083 +/- 1.5824
llama_perf_context_print: load time = 4337.33 ms
llama_perf_context_print: prompt eval time = 154185.52 ms / 36666 tokens ( 4.21 ms per token, 237.80 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 155064.54 ms / 36667 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,20 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_M.gguf (version GGUF V3 (latest))
750 79.60000000% [76.5686%, 82.3297%]
llama_perf_context_print: load time = 748.01 ms
llama_perf_context_print: prompt eval time = 545228.12 ms / 129319 tokens ( 4.22 ms per token, 237.18 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 549120.81 ms / 129320 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_M.gguf (version GGUF V3 (latest))
Final result: 42.9333 +/- 1.8086
Random chance: 25.0000 +/- 1.5822
llama_perf_context_print: load time = 733.03 ms
llama_perf_context_print: prompt eval time = 286965.35 ms / 68956 tokens ( 4.16 ms per token, 240.29 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 288291.52 ms / 68957 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,37 @@
====== Perplexity statistics ======
Mean PPL(Q) : 20.379006 ± 0.160275
Mean PPL(base) : 6.180577 ± 0.041038
Cor(ln(PPL(Q)), ln(PPL(base))): 73.93%
Mean ln(PPL(Q)/PPL(base)) : 1.193094 ± 0.005360
Mean PPL(Q)/PPL(base) : 3.297266 ± 0.017673
Mean PPL(Q)-PPL(base) : 14.198428 ± 0.132841
====== KL divergence statistics ======
Mean KLD: 1.290608 ± 0.004304
Maximum KLD: 27.217335
99.9% KLD: 13.970652
99.0% KLD: 8.700209
99.0% KLD: 8.700209
Median KLD: 0.800781
10.0% KLD: 0.078073
5.0% KLD: 0.028142
1.0% KLD: 0.004895
Minimum KLD: 0.000072
====== Token probability statistics ======
Mean Δp: -18.226 ± 0.085 %
Maximum Δp: 95.351%
99.9% Δp: 73.378%
99.0% Δp: 48.957%
95.0% Δp: 22.742%
90.0% Δp: 9.547%
75.0% Δp: 0.003%
Median Δp: -5.000%
25.0% Δp: -33.511%
10.0% Δp: -75.041%
5.0% Δp: -91.958%
1.0% Δp: -99.782%
0.1% Δp: -99.968%
Minimum Δp: -99.998%
RMS Δp : 37.928 ± 0.088 %
Same top p: 62.059 ± 0.125 %

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_M.gguf (version GGUF V3 (latest))
Final result: 38.4000 +/- 1.7771
Random chance: 19.8992 +/- 1.4588
llama_perf_context_print: load time = 775.72 ms
llama_perf_context_print: prompt eval time = 219416.23 ms / 51053 tokens ( 4.30 ms per token, 232.68 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 221003.17 ms / 51054 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,19 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_M.gguf (version GGUF V3 (latest))
Final Winogrande score(750 tasks): 72.4000 +/- 1.6334
llama_perf_context_print: load time = 754.95 ms
llama_perf_context_print: prompt eval time = 95407.56 ms / 22541 tokens ( 4.23 ms per token, 236.26 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 95958.45 ms / 22542 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_S.gguf (version GGUF V3 (latest))
Final result: 64.9333 +/- 1.7436
Random chance: 25.0083 +/- 1.5824
llama_perf_context_print: load time = 4191.63 ms
llama_perf_context_print: prompt eval time = 154546.03 ms / 36666 tokens ( 4.21 ms per token, 237.25 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 155368.54 ms / 36667 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,20 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_S.gguf (version GGUF V3 (latest))
750 79.86666667% [76.8479%, 82.5810%]
llama_perf_context_print: load time = 746.08 ms
llama_perf_context_print: prompt eval time = 544955.95 ms / 129319 tokens ( 4.21 ms per token, 237.30 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 548715.91 ms / 129320 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_S.gguf (version GGUF V3 (latest))
Final result: 42.0000 +/- 1.8034
Random chance: 25.0000 +/- 1.5822
llama_perf_context_print: load time = 771.21 ms
llama_perf_context_print: prompt eval time = 299726.08 ms / 68956 tokens ( 4.35 ms per token, 230.06 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 301016.69 ms / 68957 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,37 @@
====== Perplexity statistics ======
Mean PPL(Q) : 21.165413 ± 0.164512
Mean PPL(base) : 6.180577 ± 0.041038
Cor(ln(PPL(Q)), ln(PPL(base))): 73.80%
Mean ln(PPL(Q)/PPL(base)) : 1.230957 ± 0.005322
Mean PPL(Q)/PPL(base) : 3.424504 ± 0.018226
Mean PPL(Q)-PPL(base) : 14.984836 ± 0.137053
====== KL divergence statistics ======
Mean KLD: 1.340446 ± 0.004301
Maximum KLD: 26.031479
99.9% KLD: 13.794025
99.0% KLD: 8.598367
99.0% KLD: 8.598367
Median KLD: 0.843369
10.0% KLD: 0.087419
5.0% KLD: 0.032046
1.0% KLD: 0.005851
Minimum KLD: 0.000204
====== Token probability statistics ======
Mean Δp: -19.454 ± 0.086 %
Maximum Δp: 95.317%
99.9% Δp: 71.779%
99.0% Δp: 46.392%
95.0% Δp: 20.287%
90.0% Δp: 7.866%
75.0% Δp: -0.004%
Median Δp: -5.901%
25.0% Δp: -35.629%
10.0% Δp: -76.898%
5.0% Δp: -92.591%
1.0% Δp: -99.784%
0.1% Δp: -99.968%
Minimum Δp: -99.998%
RMS Δp : 38.586 ± 0.088 %
Same top p: 61.705 ± 0.125 %

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_S.gguf (version GGUF V3 (latest))
Final result: 38.0000 +/- 1.7736
Random chance: 19.8992 +/- 1.4588
llama_perf_context_print: load time = 775.02 ms
llama_perf_context_print: prompt eval time = 229109.68 ms / 51053 tokens ( 4.49 ms per token, 222.83 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 230721.17 ms / 51054 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,19 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_S.gguf (version GGUF V3 (latest))
Final Winogrande score(750 tasks): 72.5333 +/- 1.6309
llama_perf_context_print: load time = 766.80 ms
llama_perf_context_print: prompt eval time = 99506.70 ms / 22541 tokens ( 4.41 ms per token, 226.53 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 99996.10 ms / 22542 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ4_NL.gguf (version GGUF V3 (latest))
Final result: 68.4000 +/- 1.6988
Random chance: 25.0083 +/- 1.5824
llama_perf_context_print: load time = 5388.44 ms
llama_perf_context_print: prompt eval time = 152847.24 ms / 36666 tokens ( 4.17 ms per token, 239.89 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 153655.01 ms / 36667 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,20 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ4_NL.gguf (version GGUF V3 (latest))
750 80.66666667% [77.6870%, 83.3338%]
llama_perf_context_print: load time = 881.76 ms
llama_perf_context_print: prompt eval time = 557982.84 ms / 129319 tokens ( 4.31 ms per token, 231.76 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 561719.88 ms / 129320 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ4_NL.gguf (version GGUF V3 (latest))
Final result: 44.9333 +/- 1.8176
Random chance: 25.0000 +/- 1.5822
llama_perf_context_print: load time = 922.07 ms
llama_perf_context_print: prompt eval time = 295625.34 ms / 68956 tokens ( 4.29 ms per token, 233.25 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 296873.16 ms / 68957 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,37 @@
====== Perplexity statistics ======
Mean PPL(Q) : 18.783744 ± 0.146959
Mean PPL(base) : 6.180577 ± 0.041038
Cor(ln(PPL(Q)), ln(PPL(base))): 74.79%
Mean ln(PPL(Q)/PPL(base)) : 1.111580 ± 0.005253
Mean PPL(Q)/PPL(base) : 3.039157 ± 0.015966
Mean PPL(Q)-PPL(base) : 12.603167 ± 0.119417
====== KL divergence statistics ======
Mean KLD: 1.199318 ± 0.004258
Maximum KLD: 26.543749
99.9% KLD: 14.340773
99.0% KLD: 8.742259
99.0% KLD: 8.742259
Median KLD: 0.715601
10.0% KLD: 0.071172
5.0% KLD: 0.025864
1.0% KLD: 0.004589
Minimum KLD: 0.000142
====== Token probability statistics ======
Mean Δp: -17.171 ± 0.083 %
Maximum Δp: 92.904%
99.9% Δp: 72.496%
99.0% Δp: 48.216%
95.0% Δp: 22.569%
90.0% Δp: 10.000%
75.0% Δp: 0.011%
Median Δp: -4.438%
25.0% Δp: -30.853%
10.0% Δp: -71.739%
5.0% Δp: -90.613%
1.0% Δp: -99.767%
0.1% Δp: -99.967%
Minimum Δp: -99.998%
RMS Δp : 36.745 ± 0.088 %
Same top p: 64.223 ± 0.123 %

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ4_NL.gguf (version GGUF V3 (latest))
Final result: 38.1333 +/- 1.7748
Random chance: 19.8992 +/- 1.4588
llama_perf_context_print: load time = 920.15 ms
llama_perf_context_print: prompt eval time = 224775.45 ms / 51053 tokens ( 4.40 ms per token, 227.13 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 226389.03 ms / 51054 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,19 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ4_NL.gguf (version GGUF V3 (latest))
Final Winogrande score(750 tasks): 74.4000 +/- 1.5947
llama_perf_context_print: load time = 863.03 ms
llama_perf_context_print: prompt eval time = 98796.90 ms / 22541 tokens ( 4.38 ms per token, 228.15 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 99295.86 ms / 22542 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_L.gguf (version GGUF V3 (latest))
Final result: 67.2000 +/- 1.7155
Random chance: 25.0083 +/- 1.5824
llama_perf_context_print: load time = 4789.37 ms
llama_perf_context_print: prompt eval time = 171359.80 ms / 36666 tokens ( 4.67 ms per token, 213.97 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 172182.70 ms / 36667 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,20 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_L.gguf (version GGUF V3 (latest))
750 80.26666667% [77.2672%, 82.9576%]
llama_perf_context_print: load time = 819.35 ms
llama_perf_context_print: prompt eval time = 605874.35 ms / 129319 tokens ( 4.69 ms per token, 213.44 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 609526.15 ms / 129320 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_L.gguf (version GGUF V3 (latest))
Final result: 43.2000 +/- 1.8100
Random chance: 25.0000 +/- 1.5822
llama_perf_context_print: load time = 873.17 ms
llama_perf_context_print: prompt eval time = 318870.48 ms / 68956 tokens ( 4.62 ms per token, 216.25 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 320164.35 ms / 68957 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,37 @@
====== Perplexity statistics ======
Mean PPL(Q) : 19.313300 ± 0.150799
Mean PPL(base) : 6.180577 ± 0.041038
Cor(ln(PPL(Q)), ln(PPL(base))): 74.61%
Mean ln(PPL(Q)/PPL(base)) : 1.139382 ± 0.005262
Mean PPL(Q)/PPL(base) : 3.124838 ± 0.016443
Mean PPL(Q)-PPL(base) : 13.132723 ± 0.123247
====== KL divergence statistics ======
Mean KLD: 1.248712 ± 0.004216
Maximum KLD: 28.765745
99.9% KLD: 13.682988
99.0% KLD: 8.611128
99.0% KLD: 8.611128
Median KLD: 0.769048
10.0% KLD: 0.075574
5.0% KLD: 0.027425
1.0% KLD: 0.004777
Minimum KLD: 0.000121
====== Token probability statistics ======
Mean Δp: -17.672 ± 0.084 %
Maximum Δp: 94.089%
99.9% Δp: 73.751%
99.0% Δp: 50.009%
95.0% Δp: 22.814%
90.0% Δp: 9.454%
75.0% Δp: 0.004%
Median Δp: -4.889%
25.0% Δp: -32.188%
10.0% Δp: -72.799%
5.0% Δp: -91.232%
1.0% Δp: -99.761%
0.1% Δp: -99.966%
Minimum Δp: -99.998%
RMS Δp : 37.260 ± 0.088 %
Same top p: 62.749 ± 0.124 %

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_L.gguf (version GGUF V3 (latest))
Final result: 39.6000 +/- 1.7870
Random chance: 19.8992 +/- 1.4588
llama_perf_context_print: load time = 858.54 ms
llama_perf_context_print: prompt eval time = 243423.07 ms / 51053 tokens ( 4.77 ms per token, 209.73 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 245001.45 ms / 51054 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,19 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_L.gguf (version GGUF V3 (latest))
Final Winogrande score(750 tasks): 72.9333 +/- 1.6235
llama_perf_context_print: load time = 903.93 ms
llama_perf_context_print: prompt eval time = 106212.48 ms / 22541 tokens ( 4.71 ms per token, 212.23 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 106738.43 ms / 22542 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_M.gguf (version GGUF V3 (latest))
Final result: 66.6667 +/- 1.7225
Random chance: 25.0083 +/- 1.5824
llama_perf_context_print: load time = 4267.81 ms
llama_perf_context_print: prompt eval time = 165656.48 ms / 36666 tokens ( 4.52 ms per token, 221.34 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 166474.66 ms / 36667 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,20 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_M.gguf (version GGUF V3 (latest))
750 80.66666667% [77.6870%, 83.3338%]
llama_perf_context_print: load time = 756.25 ms
llama_perf_context_print: prompt eval time = 584702.34 ms / 129319 tokens ( 4.52 ms per token, 221.17 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 588407.02 ms / 129320 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_M.gguf (version GGUF V3 (latest))
Final result: 43.8667 +/- 1.8132
Random chance: 25.0000 +/- 1.5822
llama_perf_context_print: load time = 809.88 ms
llama_perf_context_print: prompt eval time = 300457.60 ms / 68956 tokens ( 4.36 ms per token, 229.50 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 301704.70 ms / 68957 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,37 @@
====== Perplexity statistics ======
Mean PPL(Q) : 18.723777 ± 0.145380
Mean PPL(base) : 6.180577 ± 0.041038
Cor(ln(PPL(Q)), ln(PPL(base))): 75.90%
Mean ln(PPL(Q)/PPL(base)) : 1.108382 ± 0.005110
Mean PPL(Q)/PPL(base) : 3.029454 ± 0.015481
Mean PPL(Q)-PPL(base) : 12.543199 ± 0.117315
====== KL divergence statistics ======
Mean KLD: 1.226150 ± 0.004006
Maximum KLD: 27.303829
99.9% KLD: 13.319038
99.0% KLD: 8.045850
99.0% KLD: 8.045850
Median KLD: 0.778573
10.0% KLD: 0.072866
5.0% KLD: 0.026539
1.0% KLD: 0.004645
Minimum KLD: 0.000149
====== Token probability statistics ======
Mean Δp: -17.217 ± 0.084 %
Maximum Δp: 93.510%
99.9% Δp: 74.449%
99.0% Δp: 50.740%
95.0% Δp: 23.161%
90.0% Δp: 9.707%
75.0% Δp: 0.007%
Median Δp: -4.675%
25.0% Δp: -31.413%
10.0% Δp: -71.201%
5.0% Δp: -90.111%
1.0% Δp: -99.699%
0.1% Δp: -99.959%
Minimum Δp: -99.998%
RMS Δp : 36.807 ± 0.087 %
Same top p: 63.002 ± 0.124 %

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_M.gguf (version GGUF V3 (latest))
Final result: 39.4667 +/- 1.7860
Random chance: 19.8992 +/- 1.4588
llama_perf_context_print: load time = 794.64 ms
llama_perf_context_print: prompt eval time = 226374.98 ms / 51053 tokens ( 4.43 ms per token, 225.52 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 228005.49 ms / 51054 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,19 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_M.gguf (version GGUF V3 (latest))
Final Winogrande score(750 tasks): 72.2667 +/- 1.6358
llama_perf_context_print: load time = 786.85 ms
llama_perf_context_print: prompt eval time = 98570.95 ms / 22541 tokens ( 4.37 ms per token, 228.68 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 99099.17 ms / 22542 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_S.gguf (version GGUF V3 (latest))
Final result: 66.2667 +/- 1.7276
Random chance: 25.0083 +/- 1.5824
llama_perf_context_print: load time = 4024.90 ms
llama_perf_context_print: prompt eval time = 162864.58 ms / 36666 tokens ( 4.44 ms per token, 225.13 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 163701.55 ms / 36667 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,20 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_S.gguf (version GGUF V3 (latest))
750 78.93333333% [75.8712%, 81.7006%]
llama_perf_context_print: load time = 717.95 ms
llama_perf_context_print: prompt eval time = 573377.50 ms / 129319 tokens ( 4.43 ms per token, 225.54 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 577152.30 ms / 129320 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_S.gguf (version GGUF V3 (latest))
Final result: 43.7333 +/- 1.8126
Random chance: 25.0000 +/- 1.5822
llama_perf_context_print: load time = 729.81 ms
llama_perf_context_print: prompt eval time = 302270.16 ms / 68956 tokens ( 4.38 ms per token, 228.13 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 303540.55 ms / 68957 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,37 @@
====== Perplexity statistics ======
Mean PPL(Q) : 19.765437 ± 0.153182
Mean PPL(base) : 6.180577 ± 0.041038
Cor(ln(PPL(Q)), ln(PPL(base))): 74.13%
Mean ln(PPL(Q)/PPL(base)) : 1.162523 ± 0.005278
Mean PPL(Q)/PPL(base) : 3.197992 ± 0.016878
Mean PPL(Q)-PPL(base) : 13.584860 ± 0.125811
====== KL divergence statistics ======
Mean KLD: 1.295119 ± 0.004177
Maximum KLD: 27.306818
99.9% KLD: 13.228414
99.0% KLD: 8.401047
99.0% KLD: 8.401047
Median KLD: 0.824191
10.0% KLD: 0.081751
5.0% KLD: 0.029987
1.0% KLD: 0.004980
Minimum KLD: 0.000157
====== Token probability statistics ======
Mean Δp: -18.417 ± 0.085 %
Maximum Δp: 92.683%
99.9% Δp: 74.856%
99.0% Δp: 50.844%
95.0% Δp: 22.899%
90.0% Δp: 9.183%
75.0% Δp: 0.001%
Median Δp: -5.493%
25.0% Δp: -34.119%
10.0% Δp: -74.211%
5.0% Δp: -91.714%
1.0% Δp: -99.759%
0.1% Δp: -99.964%
Minimum Δp: -99.995%
RMS Δp : 38.004 ± 0.087 %
Same top p: 61.136 ± 0.125 %

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_S.gguf (version GGUF V3 (latest))
Final result: 38.1333 +/- 1.7748
Random chance: 19.8992 +/- 1.4588
llama_perf_context_print: load time = 713.93 ms
llama_perf_context_print: prompt eval time = 230959.05 ms / 51053 tokens ( 4.52 ms per token, 221.05 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 232565.83 ms / 51054 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,19 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_S.gguf (version GGUF V3 (latest))
Final Winogrande score(750 tasks): 72.8000 +/- 1.6260
llama_perf_context_print: load time = 732.76 ms
llama_perf_context_print: prompt eval time = 100522.70 ms / 22541 tokens ( 4.46 ms per token, 224.24 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 101025.93 ms / 22542 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_M.gguf (version GGUF V3 (latest))
Final result: 68.0000 +/- 1.7045
Random chance: 25.0083 +/- 1.5824
llama_perf_context_print: load time = 5558.39 ms
llama_perf_context_print: prompt eval time = 161414.80 ms / 36666 tokens ( 4.40 ms per token, 227.15 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 162273.65 ms / 36667 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,20 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_M.gguf (version GGUF V3 (latest))
750 80.93333333% [77.9671%, 83.5843%]
llama_perf_context_print: load time = 911.43 ms
llama_perf_context_print: prompt eval time = 570839.24 ms / 129319 tokens ( 4.41 ms per token, 226.54 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 574520.13 ms / 129320 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_M.gguf (version GGUF V3 (latest))
Final result: 45.2000 +/- 1.8185
Random chance: 25.0000 +/- 1.5822
llama_perf_context_print: load time = 974.87 ms
llama_perf_context_print: prompt eval time = 300441.31 ms / 68956 tokens ( 4.36 ms per token, 229.52 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 301727.28 ms / 68957 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,37 @@
====== Perplexity statistics ======
Mean PPL(Q) : 18.556910 ± 0.145472
Mean PPL(base) : 6.180577 ± 0.041038
Cor(ln(PPL(Q)), ln(PPL(base))): 74.92%
Mean ln(PPL(Q)/PPL(base)) : 1.099431 ± 0.005248
Mean PPL(Q)/PPL(base) : 3.002456 ± 0.015757
Mean PPL(Q)-PPL(base) : 12.376333 ± 0.117900
====== KL divergence statistics ======
Mean KLD: 1.187728 ± 0.004237
Maximum KLD: 27.586824
99.9% KLD: 14.366529
99.0% KLD: 8.795478
99.0% KLD: 8.795478
Median KLD: 0.711806
10.0% KLD: 0.068614
5.0% KLD: 0.024978
1.0% KLD: 0.004366
Minimum KLD: 0.000131
====== Token probability statistics ======
Mean Δp: -16.803 ± 0.083 %
Maximum Δp: 94.144%
99.9% Δp: 72.562%
99.0% Δp: 49.033%
95.0% Δp: 23.292%
90.0% Δp: 10.354%
75.0% Δp: 0.021%
Median Δp: -4.202%
25.0% Δp: -30.211%
10.0% Δp: -70.964%
5.0% Δp: -90.473%
1.0% Δp: -99.772%
0.1% Δp: -99.967%
Minimum Δp: -99.998%
RMS Δp : 36.521 ± 0.088 %
Same top p: 63.986 ± 0.123 %

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_M.gguf (version GGUF V3 (latest))
Final result: 36.6667 +/- 1.7608
Random chance: 19.8992 +/- 1.4588
llama_perf_context_print: load time = 953.61 ms
llama_perf_context_print: prompt eval time = 229781.03 ms / 51053 tokens ( 4.50 ms per token, 222.18 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 231385.66 ms / 51054 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,19 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_M.gguf (version GGUF V3 (latest))
Final Winogrande score(750 tasks): 72.1333 +/- 1.6382
llama_perf_context_print: load time = 981.15 ms
llama_perf_context_print: prompt eval time = 100004.38 ms / 22541 tokens ( 4.44 ms per token, 225.40 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 100554.46 ms / 22542 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_S.gguf (version GGUF V3 (latest))
Final result: 67.0667 +/- 1.7172
Random chance: 25.0083 +/- 1.5824
llama_perf_context_print: load time = 5272.61 ms
llama_perf_context_print: prompt eval time = 161319.58 ms / 36666 tokens ( 4.40 ms per token, 227.29 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 162146.27 ms / 36667 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,20 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_S.gguf (version GGUF V3 (latest))
750 81.06666667% [78.1072%, 83.7095%]
llama_perf_context_print: load time = 868.43 ms
llama_perf_context_print: prompt eval time = 569329.51 ms / 129319 tokens ( 4.40 ms per token, 227.14 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 573001.51 ms / 129320 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_S.gguf (version GGUF V3 (latest))
Final result: 45.2000 +/- 1.8185
Random chance: 25.0000 +/- 1.5822
llama_perf_context_print: load time = 885.80 ms
llama_perf_context_print: prompt eval time = 300092.31 ms / 68956 tokens ( 4.35 ms per token, 229.78 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 301434.44 ms / 68957 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,37 @@
====== Perplexity statistics ======
Mean PPL(Q) : 18.663517 ± 0.146425
Mean PPL(base) : 6.180577 ± 0.041038
Cor(ln(PPL(Q)), ln(PPL(base))): 74.87%
Mean ln(PPL(Q)/PPL(base)) : 1.105159 ± 0.005257
Mean PPL(Q)/PPL(base) : 3.019704 ± 0.015873
Mean PPL(Q)-PPL(base) : 12.482940 ± 0.118853
====== KL divergence statistics ======
Mean KLD: 1.192878 ± 0.004250
Maximum KLD: 27.191839
99.9% KLD: 14.356927
99.0% KLD: 8.760485
99.0% KLD: 8.760485
Median KLD: 0.713994
10.0% KLD: 0.069566
5.0% KLD: 0.025085
1.0% KLD: 0.004382
Minimum KLD: 0.000130
====== Token probability statistics ======
Mean Δp: -16.887 ± 0.083 %
Maximum Δp: 94.834%
99.9% Δp: 72.359%
99.0% Δp: 48.972%
95.0% Δp: 23.124%
90.0% Δp: 10.343%
75.0% Δp: 0.019%
Median Δp: -4.257%
25.0% Δp: -30.401%
10.0% Δp: -71.279%
5.0% Δp: -90.560%
1.0% Δp: -99.771%
0.1% Δp: -99.966%
Minimum Δp: -99.998%
RMS Δp : 36.598 ± 0.088 %
Same top p: 63.979 ± 0.123 %

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_S.gguf (version GGUF V3 (latest))
Final result: 36.2667 +/- 1.7567
Random chance: 19.8992 +/- 1.4588
llama_perf_context_print: load time = 868.82 ms
llama_perf_context_print: prompt eval time = 229327.93 ms / 51053 tokens ( 4.49 ms per token, 222.62 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 230969.11 ms / 51054 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,19 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_S.gguf (version GGUF V3 (latest))
Final Winogrande score(750 tasks): 72.0000 +/- 1.6406
llama_perf_context_print: load time = 885.47 ms
llama_perf_context_print: prompt eval time = 99462.32 ms / 22541 tokens ( 4.41 ms per token, 226.63 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 99973.97 ms / 22542 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_M.gguf (version GGUF V3 (latest))
Final result: 67.0667 +/- 1.7172
Random chance: 25.0083 +/- 1.5824
llama_perf_context_print: load time = 6034.32 ms
llama_perf_context_print: prompt eval time = 171299.05 ms / 36666 tokens ( 4.67 ms per token, 214.05 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 172122.97 ms / 36667 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,20 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_M.gguf (version GGUF V3 (latest))
750 81.73333333% [78.8086%, 84.3346%]
llama_perf_context_print: load time = 1034.44 ms
llama_perf_context_print: prompt eval time = 604834.04 ms / 129319 tokens ( 4.68 ms per token, 213.81 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 608474.71 ms / 129320 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_M.gguf (version GGUF V3 (latest))
Final result: 44.5333 +/- 1.8160
Random chance: 25.0000 +/- 1.5822
llama_perf_context_print: load time = 1042.58 ms
llama_perf_context_print: prompt eval time = 317286.78 ms / 68956 tokens ( 4.60 ms per token, 217.33 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 318545.57 ms / 68957 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,37 @@
====== Perplexity statistics ======
Mean PPL(Q) : 18.174846 ± 0.142320
Mean PPL(base) : 6.180577 ± 0.041038
Cor(ln(PPL(Q)), ln(PPL(base))): 75.14%
Mean ln(PPL(Q)/PPL(base)) : 1.078627 ± 0.005222
Mean PPL(Q)/PPL(base) : 2.940639 ± 0.015356
Mean PPL(Q)-PPL(base) : 11.994269 ± 0.114726
====== KL divergence statistics ======
Mean KLD: 1.159685 ± 0.004238
Maximum KLD: 28.100733
99.9% KLD: 14.541190
99.0% KLD: 8.790474
99.0% KLD: 8.790474
Median KLD: 0.682733
10.0% KLD: 0.066346
5.0% KLD: 0.024022
1.0% KLD: 0.004240
Minimum KLD: 0.000159
====== Token probability statistics ======
Mean Δp: -16.552 ± 0.083 %
Maximum Δp: 94.307%
99.9% Δp: 72.111%
99.0% Δp: 47.813%
95.0% Δp: 23.079%
90.0% Δp: 10.497%
75.0% Δp: 0.030%
Median Δp: -4.029%
25.0% Δp: -29.474%
10.0% Δp: -70.364%
5.0% Δp: -90.072%
1.0% Δp: -99.761%
0.1% Δp: -99.966%
Minimum Δp: -99.998%
RMS Δp : 36.214 ± 0.088 %
Same top p: 64.455 ± 0.123 %

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_M.gguf (version GGUF V3 (latest))
Final result: 37.8667 +/- 1.7724
Random chance: 19.8992 +/- 1.4588
llama_perf_context_print: load time = 1018.74 ms
llama_perf_context_print: prompt eval time = 243028.85 ms / 51053 tokens ( 4.76 ms per token, 210.07 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 244621.86 ms / 51054 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,19 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_M.gguf (version GGUF V3 (latest))
Final Winogrande score(750 tasks): 73.8667 +/- 1.6054
llama_perf_context_print: load time = 1080.02 ms
llama_perf_context_print: prompt eval time = 105347.75 ms / 22541 tokens ( 4.67 ms per token, 213.97 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 105836.33 ms / 22542 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_S.gguf (version GGUF V3 (latest))
Final result: 67.3333 +/- 1.7137
Random chance: 25.0083 +/- 1.5824
llama_perf_context_print: load time = 5824.40 ms
llama_perf_context_print: prompt eval time = 172340.28 ms / 36666 tokens ( 4.70 ms per token, 212.75 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 173198.86 ms / 36667 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,20 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_S.gguf (version GGUF V3 (latest))
750 81.46666667% [78.5279%, 84.0847%]
llama_perf_context_print: load time = 1049.43 ms
llama_perf_context_print: prompt eval time = 592449.23 ms / 129319 tokens ( 4.58 ms per token, 218.28 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 596100.86 ms / 129320 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_S.gguf (version GGUF V3 (latest))
Final result: 44.2667 +/- 1.8149
Random chance: 25.0000 +/- 1.5822
llama_perf_context_print: load time = 1030.41 ms
llama_perf_context_print: prompt eval time = 307801.57 ms / 68956 tokens ( 4.46 ms per token, 224.03 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 309115.30 ms / 68957 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,37 @@
====== Perplexity statistics ======
Mean PPL(Q) : 18.199918 ± 0.142513
Mean PPL(base) : 6.180577 ± 0.041038
Cor(ln(PPL(Q)), ln(PPL(base))): 75.20%
Mean ln(PPL(Q)/PPL(base)) : 1.080005 ± 0.005216
Mean PPL(Q)/PPL(base) : 2.944695 ± 0.015358
Mean PPL(Q)-PPL(base) : 12.019340 ± 0.114880
====== KL divergence statistics ======
Mean KLD: 1.160040 ± 0.004229
Maximum KLD: 27.444889
99.9% KLD: 14.604442
99.0% KLD: 8.763094
99.0% KLD: 8.763094
Median KLD: 0.682655
10.0% KLD: 0.066186
5.0% KLD: 0.023837
1.0% KLD: 0.004326
Minimum KLD: 0.000148
====== Token probability statistics ======
Mean Δp: -16.565 ± 0.083 %
Maximum Δp: 94.238%
99.9% Δp: 72.114%
99.0% Δp: 47.597%
95.0% Δp: 23.064%
90.0% Δp: 10.544%
75.0% Δp: 0.030%
Median Δp: -4.020%
25.0% Δp: -29.497%
10.0% Δp: -70.424%
5.0% Δp: -90.053%
1.0% Δp: -99.759%
0.1% Δp: -99.965%
Minimum Δp: -99.998%
RMS Δp : 36.220 ± 0.088 %
Same top p: 64.431 ± 0.123 %

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_S.gguf (version GGUF V3 (latest))
Final result: 38.6667 +/- 1.7794
Random chance: 19.8992 +/- 1.4588
llama_perf_context_print: load time = 984.39 ms
llama_perf_context_print: prompt eval time = 235496.75 ms / 51053 tokens ( 4.61 ms per token, 216.79 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 237131.13 ms / 51054 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,19 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_S.gguf (version GGUF V3 (latest))
Final Winogrande score(750 tasks): 74.2667 +/- 1.5974
llama_perf_context_print: load time = 1051.46 ms
llama_perf_context_print: prompt eval time = 102348.25 ms / 22541 tokens ( 4.54 ms per token, 220.24 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 102869.23 ms / 22542 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q6_K.gguf (version GGUF V3 (latest))
Final result: 67.4667 +/- 1.7119
Random chance: 25.0083 +/- 1.5824
llama_perf_context_print: load time = 7664.20 ms
llama_perf_context_print: prompt eval time = 175407.42 ms / 36666 tokens ( 4.78 ms per token, 209.03 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 176202.75 ms / 36667 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,20 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q6_K.gguf (version GGUF V3 (latest))
750 81.06666667% [78.1072%, 83.7095%]
llama_perf_context_print: load time = 1225.60 ms
llama_perf_context_print: prompt eval time = 622423.40 ms / 129319 tokens ( 4.81 ms per token, 207.77 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 626130.15 ms / 129320 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q6_K.gguf (version GGUF V3 (latest))
Final result: 44.5333 +/- 1.8160
Random chance: 25.0000 +/- 1.5822
llama_perf_context_print: load time = 1255.72 ms
llama_perf_context_print: prompt eval time = 327772.92 ms / 68956 tokens ( 4.75 ms per token, 210.38 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 329047.99 ms / 68957 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,37 @@
====== Perplexity statistics ======
Mean PPL(Q) : 18.213825 ± 0.142965
Mean PPL(base) : 6.180577 ± 0.041038
Cor(ln(PPL(Q)), ln(PPL(base))): 75.05%
Mean ln(PPL(Q)/PPL(base)) : 1.080769 ± 0.005241
Mean PPL(Q)/PPL(base) : 2.946946 ± 0.015446
Mean PPL(Q)-PPL(base) : 12.033248 ± 0.115399
====== KL divergence statistics ======
Mean KLD: 1.158026 ± 0.004262
Maximum KLD: 26.265640
99.9% KLD: 14.634221
99.0% KLD: 8.878881
99.0% KLD: 8.878881
Median KLD: 0.679157
10.0% KLD: 0.065587
5.0% KLD: 0.023885
1.0% KLD: 0.004187
Minimum KLD: 0.000139
====== Token probability statistics ======
Mean Δp: -16.509 ± 0.083 %
Maximum Δp: 94.239%
99.9% Δp: 72.141%
99.0% Δp: 48.282%
95.0% Δp: 23.325%
90.0% Δp: 10.605%
75.0% Δp: 0.032%
Median Δp: -4.000%
25.0% Δp: -29.434%
10.0% Δp: -70.325%
5.0% Δp: -90.061%
1.0% Δp: -99.771%
0.1% Δp: -99.967%
Minimum Δp: -99.998%
RMS Δp : 36.219 ± 0.088 %
Same top p: 64.447 ± 0.123 %

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q6_K.gguf (version GGUF V3 (latest))
Final result: 39.6000 +/- 1.7870
Random chance: 19.8992 +/- 1.4588
llama_perf_context_print: load time = 1251.53 ms
llama_perf_context_print: prompt eval time = 250368.11 ms / 51053 tokens ( 4.90 ms per token, 203.91 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 251979.66 ms / 51054 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,19 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q6_K.gguf (version GGUF V3 (latest))
Final Winogrande score(750 tasks): 73.8667 +/- 1.6054
llama_perf_context_print: load time = 1250.90 ms
llama_perf_context_print: prompt eval time = 108113.49 ms / 22541 tokens ( 4.80 ms per token, 208.49 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 108625.99 ms / 22542 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q8_0.gguf (version GGUF V3 (latest))
Final result: 68.1333 +/- 1.7026
Random chance: 25.0083 +/- 1.5824
llama_perf_context_print: load time = 10486.40 ms
llama_perf_context_print: prompt eval time = 161243.77 ms / 36666 tokens ( 4.40 ms per token, 227.39 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 162055.73 ms / 36667 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,20 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q8_0.gguf (version GGUF V3 (latest))
750 81.33333333% [78.3876%, 83.9597%]
llama_perf_context_print: load time = 1550.41 ms
llama_perf_context_print: prompt eval time = 553902.33 ms / 129319 tokens ( 4.28 ms per token, 233.47 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 557360.27 ms / 129320 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q8_0.gguf (version GGUF V3 (latest))
Final result: 44.9333 +/- 1.8176
Random chance: 25.0000 +/- 1.5822
llama_perf_context_print: load time = 1526.16 ms
llama_perf_context_print: prompt eval time = 290805.14 ms / 68956 tokens ( 4.22 ms per token, 237.12 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 292006.82 ms / 68957 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,37 @@
====== Perplexity statistics ======
Mean PPL(Q) : 18.203515 ± 0.142826
Mean PPL(base) : 6.180577 ± 0.041038
Cor(ln(PPL(Q)), ln(PPL(base))): 75.02%
Mean ln(PPL(Q)/PPL(base)) : 1.080203 ± 0.005242
Mean PPL(Q)/PPL(base) : 2.945277 ± 0.015439
Mean PPL(Q)-PPL(base) : 12.022938 ± 0.115276
====== KL divergence statistics ======
Mean KLD: 1.158351 ± 0.004265
Maximum KLD: 27.082415
99.9% KLD: 14.510898
99.0% KLD: 8.873251
99.0% KLD: 8.873251
Median KLD: 0.678687
10.0% KLD: 0.065954
5.0% KLD: 0.024006
1.0% KLD: 0.004161
Minimum KLD: 0.000141
====== Token probability statistics ======
Mean Δp: -16.515 ± 0.083 %
Maximum Δp: 94.585%
99.9% Δp: 72.074%
99.0% Δp: 48.296%
95.0% Δp: 23.169%
90.0% Δp: 10.662%
75.0% Δp: 0.031%
Median Δp: -4.006%
25.0% Δp: -29.412%
10.0% Δp: -70.362%
5.0% Δp: -90.143%
1.0% Δp: -99.767%
0.1% Δp: -99.966%
Minimum Δp: -99.998%
RMS Δp : 36.227 ± 0.088 %
Same top p: 64.562 ± 0.123 %

View File

@@ -0,0 +1,21 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q8_0.gguf (version GGUF V3 (latest))
Final result: 38.2667 +/- 1.7759
Random chance: 19.8992 +/- 1.4588
llama_perf_context_print: load time = 1498.38 ms
llama_perf_context_print: prompt eval time = 227802.62 ms / 51053 tokens ( 4.46 ms per token, 224.11 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 229374.00 ms / 51054 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)

View File

@@ -0,0 +1,19 @@
build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0
llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free
llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q8_0.gguf (version GGUF V3 (latest))
Final Winogrande score(750 tasks): 74.4000 +/- 1.5947
llama_perf_context_print: load time = 1463.29 ms
llama_perf_context_print: prompt eval time = 100036.10 ms / 22541 tokens ( 4.44 ms per token, 225.33 tokens per second)
llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_perf_context_print: total time = 100546.29 ms / 22542 tokens
ggml_metal_free: deallocating
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)
ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)