diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-F16.arc b/scores/Dolphin-Mistral-24B-Venice-Edition-F16.arc new file mode 100644 index 0000000..268ddaa --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-F16.arc @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from ./Dolphin-Mistral-24B-Venice-Edition-F16.gguf (version GGUF V3 (latest)) + +Final result: 70.8000 +/- 1.6614 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 40685.08 ms +llama_perf_context_print: prompt eval time = 151474.18 ms / 36666 tokens ( 4.13 ms per token, 242.06 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 152313.64 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-F16.hsw b/scores/Dolphin-Mistral-24B-Venice-Edition-F16.hsw new file mode 100644 index 0000000..d0127cb --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-F16.hsw @@ -0,0 +1,20 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from ./Dolphin-Mistral-24B-Venice-Edition-F16.gguf (version GGUF V3 (latest)) + +750 84.53333333% [81.7702%, 86.9445%] + + +llama_perf_context_print: load time = 2898.86 ms +llama_perf_context_print: prompt eval time = 545800.60 ms / 129319 tokens ( 4.22 ms per token, 236.93 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 549439.05 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-F16.mmlu b/scores/Dolphin-Mistral-24B-Venice-Edition-F16.mmlu new file mode 100644 index 0000000..4688e84 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-F16.mmlu @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from ./Dolphin-Mistral-24B-Venice-Edition-F16.gguf (version GGUF V3 (latest)) + +Final result: 45.3333 +/- 1.8190 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 2912.06 ms +llama_perf_context_print: prompt eval time = 289195.06 ms / 68956 tokens ( 4.19 ms per token, 238.44 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 290438.95 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-F16.tqa b/scores/Dolphin-Mistral-24B-Venice-Edition-F16.tqa new file mode 100644 index 0000000..f9a678b --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-F16.tqa @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from ./Dolphin-Mistral-24B-Venice-Edition-F16.gguf (version GGUF V3 (latest)) + +Final result: 38.1333 +/- 1.7748 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 2866.02 ms +llama_perf_context_print: prompt eval time = 221192.17 ms / 51053 tokens ( 4.33 ms per token, 230.81 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 222713.65 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-F16.wng b/scores/Dolphin-Mistral-24B-Venice-Edition-F16.wng new file mode 100644 index 0000000..8db37f8 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-F16.wng @@ -0,0 +1,19 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from ./Dolphin-Mistral-24B-Venice-Edition-F16.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 80.2667 +/- 1.4542 + +llama_perf_context_print: load time = 2915.27 ms +llama_perf_context_print: prompt eval time = 93765.90 ms / 22541 tokens ( 4.16 ms per token, 240.40 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 94258.84 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.arc b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.arc new file mode 100644 index 0000000..826aafd --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.arc @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_M.gguf (version GGUF V3 (latest)) + +Final result: 65.6000 +/- 1.7358 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 4337.33 ms +llama_perf_context_print: prompt eval time = 154185.52 ms / 36666 tokens ( 4.21 ms per token, 237.80 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 155064.54 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.hsw b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.hsw new file mode 100644 index 0000000..5daf40c --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.hsw @@ -0,0 +1,20 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_M.gguf (version GGUF V3 (latest)) + +750 79.60000000% [76.5686%, 82.3297%] + + +llama_perf_context_print: load time = 748.01 ms +llama_perf_context_print: prompt eval time = 545228.12 ms / 129319 tokens ( 4.22 ms per token, 237.18 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 549120.81 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.mmlu b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.mmlu new file mode 100644 index 0000000..53203f3 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.mmlu @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_M.gguf (version GGUF V3 (latest)) + +Final result: 42.9333 +/- 1.8086 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 733.03 ms +llama_perf_context_print: prompt eval time = 286965.35 ms / 68956 tokens ( 4.16 ms per token, 240.29 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 288291.52 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.ppx b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.ppx new file mode 100644 index 0000000..bcbd5ef --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 20.379006 ± 0.160275 +Mean PPL(base) : 6.180577 ± 0.041038 +Cor(ln(PPL(Q)), ln(PPL(base))): 73.93% +Mean ln(PPL(Q)/PPL(base)) : 1.193094 ± 0.005360 +Mean PPL(Q)/PPL(base) : 3.297266 ± 0.017673 +Mean PPL(Q)-PPL(base) : 14.198428 ± 0.132841 + +====== KL divergence statistics ====== +Mean KLD: 1.290608 ± 0.004304 +Maximum KLD: 27.217335 +99.9% KLD: 13.970652 +99.0% KLD: 8.700209 +99.0% KLD: 8.700209 +Median KLD: 0.800781 +10.0% KLD: 0.078073 + 5.0% KLD: 0.028142 + 1.0% KLD: 0.004895 +Minimum KLD: 0.000072 + +====== Token probability statistics ====== +Mean Δp: -18.226 ± 0.085 % +Maximum Δp: 95.351% +99.9% Δp: 73.378% +99.0% Δp: 48.957% +95.0% Δp: 22.742% +90.0% Δp: 9.547% +75.0% Δp: 0.003% +Median Δp: -5.000% +25.0% Δp: -33.511% +10.0% Δp: -75.041% + 5.0% Δp: -91.958% + 1.0% Δp: -99.782% + 0.1% Δp: -99.968% +Minimum Δp: -99.998% +RMS Δp : 37.928 ± 0.088 % +Same top p: 62.059 ± 0.125 % diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.tqa b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.tqa new file mode 100644 index 0000000..9d48966 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.tqa @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_M.gguf (version GGUF V3 (latest)) + +Final result: 38.4000 +/- 1.7771 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 775.72 ms +llama_perf_context_print: prompt eval time = 219416.23 ms / 51053 tokens ( 4.30 ms per token, 232.68 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 221003.17 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.wng b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.wng new file mode 100644 index 0000000..a517f3d --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_m.wng @@ -0,0 +1,19 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_M.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 72.4000 +/- 1.6334 + +llama_perf_context_print: load time = 754.95 ms +llama_perf_context_print: prompt eval time = 95407.56 ms / 22541 tokens ( 4.23 ms per token, 236.26 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 95958.45 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.arc b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.arc new file mode 100644 index 0000000..1f555b7 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.arc @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_S.gguf (version GGUF V3 (latest)) + +Final result: 64.9333 +/- 1.7436 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 4191.63 ms +llama_perf_context_print: prompt eval time = 154546.03 ms / 36666 tokens ( 4.21 ms per token, 237.25 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 155368.54 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.hsw b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.hsw new file mode 100644 index 0000000..c4e96d3 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.hsw @@ -0,0 +1,20 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_S.gguf (version GGUF V3 (latest)) + +750 79.86666667% [76.8479%, 82.5810%] + + +llama_perf_context_print: load time = 746.08 ms +llama_perf_context_print: prompt eval time = 544955.95 ms / 129319 tokens ( 4.21 ms per token, 237.30 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 548715.91 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.mmlu b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.mmlu new file mode 100644 index 0000000..1961f50 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.mmlu @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_S.gguf (version GGUF V3 (latest)) + +Final result: 42.0000 +/- 1.8034 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 771.21 ms +llama_perf_context_print: prompt eval time = 299726.08 ms / 68956 tokens ( 4.35 ms per token, 230.06 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 301016.69 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.ppx b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.ppx new file mode 100644 index 0000000..4d41684 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 21.165413 ± 0.164512 +Mean PPL(base) : 6.180577 ± 0.041038 +Cor(ln(PPL(Q)), ln(PPL(base))): 73.80% +Mean ln(PPL(Q)/PPL(base)) : 1.230957 ± 0.005322 +Mean PPL(Q)/PPL(base) : 3.424504 ± 0.018226 +Mean PPL(Q)-PPL(base) : 14.984836 ± 0.137053 + +====== KL divergence statistics ====== +Mean KLD: 1.340446 ± 0.004301 +Maximum KLD: 26.031479 +99.9% KLD: 13.794025 +99.0% KLD: 8.598367 +99.0% KLD: 8.598367 +Median KLD: 0.843369 +10.0% KLD: 0.087419 + 5.0% KLD: 0.032046 + 1.0% KLD: 0.005851 +Minimum KLD: 0.000204 + +====== Token probability statistics ====== +Mean Δp: -19.454 ± 0.086 % +Maximum Δp: 95.317% +99.9% Δp: 71.779% +99.0% Δp: 46.392% +95.0% Δp: 20.287% +90.0% Δp: 7.866% +75.0% Δp: -0.004% +Median Δp: -5.901% +25.0% Δp: -35.629% +10.0% Δp: -76.898% + 5.0% Δp: -92.591% + 1.0% Δp: -99.784% + 0.1% Δp: -99.968% +Minimum Δp: -99.998% +RMS Δp : 38.586 ± 0.088 % +Same top p: 61.705 ± 0.125 % diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.tqa b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.tqa new file mode 100644 index 0000000..63c4efc --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.tqa @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_S.gguf (version GGUF V3 (latest)) + +Final result: 38.0000 +/- 1.7736 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 775.02 ms +llama_perf_context_print: prompt eval time = 229109.68 ms / 51053 tokens ( 4.49 ms per token, 222.83 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 230721.17 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.wng b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.wng new file mode 100644 index 0000000..dc0b2e8 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq3_s.wng @@ -0,0 +1,19 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ3_S.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 72.5333 +/- 1.6309 + +llama_perf_context_print: load time = 766.80 ms +llama_perf_context_print: prompt eval time = 99506.70 ms / 22541 tokens ( 4.41 ms per token, 226.53 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 99996.10 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.arc b/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.arc new file mode 100644 index 0000000..b1bc84b --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.arc @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ4_NL.gguf (version GGUF V3 (latest)) + +Final result: 68.4000 +/- 1.6988 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 5388.44 ms +llama_perf_context_print: prompt eval time = 152847.24 ms / 36666 tokens ( 4.17 ms per token, 239.89 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 153655.01 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.hsw b/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.hsw new file mode 100644 index 0000000..6e1a491 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.hsw @@ -0,0 +1,20 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ4_NL.gguf (version GGUF V3 (latest)) + +750 80.66666667% [77.6870%, 83.3338%] + + +llama_perf_context_print: load time = 881.76 ms +llama_perf_context_print: prompt eval time = 557982.84 ms / 129319 tokens ( 4.31 ms per token, 231.76 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 561719.88 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.mmlu b/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.mmlu new file mode 100644 index 0000000..37775bb --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.mmlu @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ4_NL.gguf (version GGUF V3 (latest)) + +Final result: 44.9333 +/- 1.8176 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 922.07 ms +llama_perf_context_print: prompt eval time = 295625.34 ms / 68956 tokens ( 4.29 ms per token, 233.25 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 296873.16 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.ppx b/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.ppx new file mode 100644 index 0000000..41853c5 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 18.783744 ± 0.146959 +Mean PPL(base) : 6.180577 ± 0.041038 +Cor(ln(PPL(Q)), ln(PPL(base))): 74.79% +Mean ln(PPL(Q)/PPL(base)) : 1.111580 ± 0.005253 +Mean PPL(Q)/PPL(base) : 3.039157 ± 0.015966 +Mean PPL(Q)-PPL(base) : 12.603167 ± 0.119417 + +====== KL divergence statistics ====== +Mean KLD: 1.199318 ± 0.004258 +Maximum KLD: 26.543749 +99.9% KLD: 14.340773 +99.0% KLD: 8.742259 +99.0% KLD: 8.742259 +Median KLD: 0.715601 +10.0% KLD: 0.071172 + 5.0% KLD: 0.025864 + 1.0% KLD: 0.004589 +Minimum KLD: 0.000142 + +====== Token probability statistics ====== +Mean Δp: -17.171 ± 0.083 % +Maximum Δp: 92.904% +99.9% Δp: 72.496% +99.0% Δp: 48.216% +95.0% Δp: 22.569% +90.0% Δp: 10.000% +75.0% Δp: 0.011% +Median Δp: -4.438% +25.0% Δp: -30.853% +10.0% Δp: -71.739% + 5.0% Δp: -90.613% + 1.0% Δp: -99.767% + 0.1% Δp: -99.967% +Minimum Δp: -99.998% +RMS Δp : 36.745 ± 0.088 % +Same top p: 64.223 ± 0.123 % diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.tqa b/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.tqa new file mode 100644 index 0000000..5bd4e11 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.tqa @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ4_NL.gguf (version GGUF V3 (latest)) + +Final result: 38.1333 +/- 1.7748 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 920.15 ms +llama_perf_context_print: prompt eval time = 224775.45 ms / 51053 tokens ( 4.40 ms per token, 227.13 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 226389.03 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.wng b/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.wng new file mode 100644 index 0000000..c8cf658 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-iq4_nl.wng @@ -0,0 +1,19 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-IQ4_NL.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 74.4000 +/- 1.5947 + +llama_perf_context_print: load time = 863.03 ms +llama_perf_context_print: prompt eval time = 98796.90 ms / 22541 tokens ( 4.38 ms per token, 228.15 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 99295.86 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.arc b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.arc new file mode 100644 index 0000000..6c5126d --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.arc @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_L.gguf (version GGUF V3 (latest)) + +Final result: 67.2000 +/- 1.7155 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 4789.37 ms +llama_perf_context_print: prompt eval time = 171359.80 ms / 36666 tokens ( 4.67 ms per token, 213.97 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 172182.70 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.hsw b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.hsw new file mode 100644 index 0000000..390830a --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.hsw @@ -0,0 +1,20 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_L.gguf (version GGUF V3 (latest)) + +750 80.26666667% [77.2672%, 82.9576%] + + +llama_perf_context_print: load time = 819.35 ms +llama_perf_context_print: prompt eval time = 605874.35 ms / 129319 tokens ( 4.69 ms per token, 213.44 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 609526.15 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.mmlu b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.mmlu new file mode 100644 index 0000000..92869b1 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.mmlu @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_L.gguf (version GGUF V3 (latest)) + +Final result: 43.2000 +/- 1.8100 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 873.17 ms +llama_perf_context_print: prompt eval time = 318870.48 ms / 68956 tokens ( 4.62 ms per token, 216.25 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 320164.35 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.ppx b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.ppx new file mode 100644 index 0000000..f36ab2c --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 19.313300 ± 0.150799 +Mean PPL(base) : 6.180577 ± 0.041038 +Cor(ln(PPL(Q)), ln(PPL(base))): 74.61% +Mean ln(PPL(Q)/PPL(base)) : 1.139382 ± 0.005262 +Mean PPL(Q)/PPL(base) : 3.124838 ± 0.016443 +Mean PPL(Q)-PPL(base) : 13.132723 ± 0.123247 + +====== KL divergence statistics ====== +Mean KLD: 1.248712 ± 0.004216 +Maximum KLD: 28.765745 +99.9% KLD: 13.682988 +99.0% KLD: 8.611128 +99.0% KLD: 8.611128 +Median KLD: 0.769048 +10.0% KLD: 0.075574 + 5.0% KLD: 0.027425 + 1.0% KLD: 0.004777 +Minimum KLD: 0.000121 + +====== Token probability statistics ====== +Mean Δp: -17.672 ± 0.084 % +Maximum Δp: 94.089% +99.9% Δp: 73.751% +99.0% Δp: 50.009% +95.0% Δp: 22.814% +90.0% Δp: 9.454% +75.0% Δp: 0.004% +Median Δp: -4.889% +25.0% Δp: -32.188% +10.0% Δp: -72.799% + 5.0% Δp: -91.232% + 1.0% Δp: -99.761% + 0.1% Δp: -99.966% +Minimum Δp: -99.998% +RMS Δp : 37.260 ± 0.088 % +Same top p: 62.749 ± 0.124 % diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.tqa b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.tqa new file mode 100644 index 0000000..a587f2c --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.tqa @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_L.gguf (version GGUF V3 (latest)) + +Final result: 39.6000 +/- 1.7870 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 858.54 ms +llama_perf_context_print: prompt eval time = 243423.07 ms / 51053 tokens ( 4.77 ms per token, 209.73 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 245001.45 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.wng b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.wng new file mode 100644 index 0000000..a0e9485 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_l.wng @@ -0,0 +1,19 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_L.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 72.9333 +/- 1.6235 + +llama_perf_context_print: load time = 903.93 ms +llama_perf_context_print: prompt eval time = 106212.48 ms / 22541 tokens ( 4.71 ms per token, 212.23 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 106738.43 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.arc b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.arc new file mode 100644 index 0000000..8b507f7 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.arc @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_M.gguf (version GGUF V3 (latest)) + +Final result: 66.6667 +/- 1.7225 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 4267.81 ms +llama_perf_context_print: prompt eval time = 165656.48 ms / 36666 tokens ( 4.52 ms per token, 221.34 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 166474.66 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.hsw b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.hsw new file mode 100644 index 0000000..9e2b780 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.hsw @@ -0,0 +1,20 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_M.gguf (version GGUF V3 (latest)) + +750 80.66666667% [77.6870%, 83.3338%] + + +llama_perf_context_print: load time = 756.25 ms +llama_perf_context_print: prompt eval time = 584702.34 ms / 129319 tokens ( 4.52 ms per token, 221.17 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 588407.02 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.mmlu b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.mmlu new file mode 100644 index 0000000..9aba027 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.mmlu @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_M.gguf (version GGUF V3 (latest)) + +Final result: 43.8667 +/- 1.8132 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 809.88 ms +llama_perf_context_print: prompt eval time = 300457.60 ms / 68956 tokens ( 4.36 ms per token, 229.50 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 301704.70 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.ppx b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.ppx new file mode 100644 index 0000000..81c61bb --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 18.723777 ± 0.145380 +Mean PPL(base) : 6.180577 ± 0.041038 +Cor(ln(PPL(Q)), ln(PPL(base))): 75.90% +Mean ln(PPL(Q)/PPL(base)) : 1.108382 ± 0.005110 +Mean PPL(Q)/PPL(base) : 3.029454 ± 0.015481 +Mean PPL(Q)-PPL(base) : 12.543199 ± 0.117315 + +====== KL divergence statistics ====== +Mean KLD: 1.226150 ± 0.004006 +Maximum KLD: 27.303829 +99.9% KLD: 13.319038 +99.0% KLD: 8.045850 +99.0% KLD: 8.045850 +Median KLD: 0.778573 +10.0% KLD: 0.072866 + 5.0% KLD: 0.026539 + 1.0% KLD: 0.004645 +Minimum KLD: 0.000149 + +====== Token probability statistics ====== +Mean Δp: -17.217 ± 0.084 % +Maximum Δp: 93.510% +99.9% Δp: 74.449% +99.0% Δp: 50.740% +95.0% Δp: 23.161% +90.0% Δp: 9.707% +75.0% Δp: 0.007% +Median Δp: -4.675% +25.0% Δp: -31.413% +10.0% Δp: -71.201% + 5.0% Δp: -90.111% + 1.0% Δp: -99.699% + 0.1% Δp: -99.959% +Minimum Δp: -99.998% +RMS Δp : 36.807 ± 0.087 % +Same top p: 63.002 ± 0.124 % diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.tqa b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.tqa new file mode 100644 index 0000000..65c50e5 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.tqa @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_M.gguf (version GGUF V3 (latest)) + +Final result: 39.4667 +/- 1.7860 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 794.64 ms +llama_perf_context_print: prompt eval time = 226374.98 ms / 51053 tokens ( 4.43 ms per token, 225.52 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 228005.49 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.wng b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.wng new file mode 100644 index 0000000..1f3acb8 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_m.wng @@ -0,0 +1,19 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_M.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 72.2667 +/- 1.6358 + +llama_perf_context_print: load time = 786.85 ms +llama_perf_context_print: prompt eval time = 98570.95 ms / 22541 tokens ( 4.37 ms per token, 228.68 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 99099.17 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.arc b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.arc new file mode 100644 index 0000000..e4bf211 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.arc @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_S.gguf (version GGUF V3 (latest)) + +Final result: 66.2667 +/- 1.7276 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 4024.90 ms +llama_perf_context_print: prompt eval time = 162864.58 ms / 36666 tokens ( 4.44 ms per token, 225.13 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 163701.55 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.hsw b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.hsw new file mode 100644 index 0000000..5d163a9 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.hsw @@ -0,0 +1,20 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_S.gguf (version GGUF V3 (latest)) + +750 78.93333333% [75.8712%, 81.7006%] + + +llama_perf_context_print: load time = 717.95 ms +llama_perf_context_print: prompt eval time = 573377.50 ms / 129319 tokens ( 4.43 ms per token, 225.54 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 577152.30 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.mmlu b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.mmlu new file mode 100644 index 0000000..cc9d103 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.mmlu @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_S.gguf (version GGUF V3 (latest)) + +Final result: 43.7333 +/- 1.8126 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 729.81 ms +llama_perf_context_print: prompt eval time = 302270.16 ms / 68956 tokens ( 4.38 ms per token, 228.13 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 303540.55 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.ppx b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.ppx new file mode 100644 index 0000000..250bd28 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 19.765437 ± 0.153182 +Mean PPL(base) : 6.180577 ± 0.041038 +Cor(ln(PPL(Q)), ln(PPL(base))): 74.13% +Mean ln(PPL(Q)/PPL(base)) : 1.162523 ± 0.005278 +Mean PPL(Q)/PPL(base) : 3.197992 ± 0.016878 +Mean PPL(Q)-PPL(base) : 13.584860 ± 0.125811 + +====== KL divergence statistics ====== +Mean KLD: 1.295119 ± 0.004177 +Maximum KLD: 27.306818 +99.9% KLD: 13.228414 +99.0% KLD: 8.401047 +99.0% KLD: 8.401047 +Median KLD: 0.824191 +10.0% KLD: 0.081751 + 5.0% KLD: 0.029987 + 1.0% KLD: 0.004980 +Minimum KLD: 0.000157 + +====== Token probability statistics ====== +Mean Δp: -18.417 ± 0.085 % +Maximum Δp: 92.683% +99.9% Δp: 74.856% +99.0% Δp: 50.844% +95.0% Δp: 22.899% +90.0% Δp: 9.183% +75.0% Δp: 0.001% +Median Δp: -5.493% +25.0% Δp: -34.119% +10.0% Δp: -74.211% + 5.0% Δp: -91.714% + 1.0% Δp: -99.759% + 0.1% Δp: -99.964% +Minimum Δp: -99.995% +RMS Δp : 38.004 ± 0.087 % +Same top p: 61.136 ± 0.125 % diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.tqa b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.tqa new file mode 100644 index 0000000..5d8a8db --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.tqa @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_S.gguf (version GGUF V3 (latest)) + +Final result: 38.1333 +/- 1.7748 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 713.93 ms +llama_perf_context_print: prompt eval time = 230959.05 ms / 51053 tokens ( 4.52 ms per token, 221.05 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 232565.83 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.wng b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.wng new file mode 100644 index 0000000..dfdfa99 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q3_k_s.wng @@ -0,0 +1,19 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q3_K_S.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 72.8000 +/- 1.6260 + +llama_perf_context_print: load time = 732.76 ms +llama_perf_context_print: prompt eval time = 100522.70 ms / 22541 tokens ( 4.46 ms per token, 224.24 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 101025.93 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.arc b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.arc new file mode 100644 index 0000000..4327f8d --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.arc @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_M.gguf (version GGUF V3 (latest)) + +Final result: 68.0000 +/- 1.7045 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 5558.39 ms +llama_perf_context_print: prompt eval time = 161414.80 ms / 36666 tokens ( 4.40 ms per token, 227.15 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 162273.65 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.hsw b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.hsw new file mode 100644 index 0000000..c4d284a --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.hsw @@ -0,0 +1,20 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_M.gguf (version GGUF V3 (latest)) + +750 80.93333333% [77.9671%, 83.5843%] + + +llama_perf_context_print: load time = 911.43 ms +llama_perf_context_print: prompt eval time = 570839.24 ms / 129319 tokens ( 4.41 ms per token, 226.54 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 574520.13 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.mmlu b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.mmlu new file mode 100644 index 0000000..4fe37ed --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.mmlu @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_M.gguf (version GGUF V3 (latest)) + +Final result: 45.2000 +/- 1.8185 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 974.87 ms +llama_perf_context_print: prompt eval time = 300441.31 ms / 68956 tokens ( 4.36 ms per token, 229.52 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 301727.28 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.ppx b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.ppx new file mode 100644 index 0000000..505cafc --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 18.556910 ± 0.145472 +Mean PPL(base) : 6.180577 ± 0.041038 +Cor(ln(PPL(Q)), ln(PPL(base))): 74.92% +Mean ln(PPL(Q)/PPL(base)) : 1.099431 ± 0.005248 +Mean PPL(Q)/PPL(base) : 3.002456 ± 0.015757 +Mean PPL(Q)-PPL(base) : 12.376333 ± 0.117900 + +====== KL divergence statistics ====== +Mean KLD: 1.187728 ± 0.004237 +Maximum KLD: 27.586824 +99.9% KLD: 14.366529 +99.0% KLD: 8.795478 +99.0% KLD: 8.795478 +Median KLD: 0.711806 +10.0% KLD: 0.068614 + 5.0% KLD: 0.024978 + 1.0% KLD: 0.004366 +Minimum KLD: 0.000131 + +====== Token probability statistics ====== +Mean Δp: -16.803 ± 0.083 % +Maximum Δp: 94.144% +99.9% Δp: 72.562% +99.0% Δp: 49.033% +95.0% Δp: 23.292% +90.0% Δp: 10.354% +75.0% Δp: 0.021% +Median Δp: -4.202% +25.0% Δp: -30.211% +10.0% Δp: -70.964% + 5.0% Δp: -90.473% + 1.0% Δp: -99.772% + 0.1% Δp: -99.967% +Minimum Δp: -99.998% +RMS Δp : 36.521 ± 0.088 % +Same top p: 63.986 ± 0.123 % diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.tqa b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.tqa new file mode 100644 index 0000000..bff4d28 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.tqa @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_M.gguf (version GGUF V3 (latest)) + +Final result: 36.6667 +/- 1.7608 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 953.61 ms +llama_perf_context_print: prompt eval time = 229781.03 ms / 51053 tokens ( 4.50 ms per token, 222.18 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 231385.66 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.wng b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.wng new file mode 100644 index 0000000..af2615e --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_m.wng @@ -0,0 +1,19 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_M.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 72.1333 +/- 1.6382 + +llama_perf_context_print: load time = 981.15 ms +llama_perf_context_print: prompt eval time = 100004.38 ms / 22541 tokens ( 4.44 ms per token, 225.40 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 100554.46 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.arc b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.arc new file mode 100644 index 0000000..fec30eb --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.arc @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_S.gguf (version GGUF V3 (latest)) + +Final result: 67.0667 +/- 1.7172 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 5272.61 ms +llama_perf_context_print: prompt eval time = 161319.58 ms / 36666 tokens ( 4.40 ms per token, 227.29 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 162146.27 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.hsw b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.hsw new file mode 100644 index 0000000..e17d440 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.hsw @@ -0,0 +1,20 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_S.gguf (version GGUF V3 (latest)) + +750 81.06666667% [78.1072%, 83.7095%] + + +llama_perf_context_print: load time = 868.43 ms +llama_perf_context_print: prompt eval time = 569329.51 ms / 129319 tokens ( 4.40 ms per token, 227.14 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 573001.51 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.mmlu b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.mmlu new file mode 100644 index 0000000..fc2900a --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.mmlu @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_S.gguf (version GGUF V3 (latest)) + +Final result: 45.2000 +/- 1.8185 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 885.80 ms +llama_perf_context_print: prompt eval time = 300092.31 ms / 68956 tokens ( 4.35 ms per token, 229.78 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 301434.44 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.ppx b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.ppx new file mode 100644 index 0000000..5b92ff1 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 18.663517 ± 0.146425 +Mean PPL(base) : 6.180577 ± 0.041038 +Cor(ln(PPL(Q)), ln(PPL(base))): 74.87% +Mean ln(PPL(Q)/PPL(base)) : 1.105159 ± 0.005257 +Mean PPL(Q)/PPL(base) : 3.019704 ± 0.015873 +Mean PPL(Q)-PPL(base) : 12.482940 ± 0.118853 + +====== KL divergence statistics ====== +Mean KLD: 1.192878 ± 0.004250 +Maximum KLD: 27.191839 +99.9% KLD: 14.356927 +99.0% KLD: 8.760485 +99.0% KLD: 8.760485 +Median KLD: 0.713994 +10.0% KLD: 0.069566 + 5.0% KLD: 0.025085 + 1.0% KLD: 0.004382 +Minimum KLD: 0.000130 + +====== Token probability statistics ====== +Mean Δp: -16.887 ± 0.083 % +Maximum Δp: 94.834% +99.9% Δp: 72.359% +99.0% Δp: 48.972% +95.0% Δp: 23.124% +90.0% Δp: 10.343% +75.0% Δp: 0.019% +Median Δp: -4.257% +25.0% Δp: -30.401% +10.0% Δp: -71.279% + 5.0% Δp: -90.560% + 1.0% Δp: -99.771% + 0.1% Δp: -99.966% +Minimum Δp: -99.998% +RMS Δp : 36.598 ± 0.088 % +Same top p: 63.979 ± 0.123 % diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.tqa b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.tqa new file mode 100644 index 0000000..a92437d --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.tqa @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_S.gguf (version GGUF V3 (latest)) + +Final result: 36.2667 +/- 1.7567 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 868.82 ms +llama_perf_context_print: prompt eval time = 229327.93 ms / 51053 tokens ( 4.49 ms per token, 222.62 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 230969.11 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.wng b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.wng new file mode 100644 index 0000000..2879fa8 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q4_k_s.wng @@ -0,0 +1,19 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q4_K_S.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 72.0000 +/- 1.6406 + +llama_perf_context_print: load time = 885.47 ms +llama_perf_context_print: prompt eval time = 99462.32 ms / 22541 tokens ( 4.41 ms per token, 226.63 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 99973.97 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.arc b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.arc new file mode 100644 index 0000000..5e04d99 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.arc @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_M.gguf (version GGUF V3 (latest)) + +Final result: 67.0667 +/- 1.7172 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 6034.32 ms +llama_perf_context_print: prompt eval time = 171299.05 ms / 36666 tokens ( 4.67 ms per token, 214.05 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 172122.97 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.hsw b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.hsw new file mode 100644 index 0000000..1b90b3b --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.hsw @@ -0,0 +1,20 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_M.gguf (version GGUF V3 (latest)) + +750 81.73333333% [78.8086%, 84.3346%] + + +llama_perf_context_print: load time = 1034.44 ms +llama_perf_context_print: prompt eval time = 604834.04 ms / 129319 tokens ( 4.68 ms per token, 213.81 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 608474.71 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.mmlu b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.mmlu new file mode 100644 index 0000000..fac4dc4 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.mmlu @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_M.gguf (version GGUF V3 (latest)) + +Final result: 44.5333 +/- 1.8160 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 1042.58 ms +llama_perf_context_print: prompt eval time = 317286.78 ms / 68956 tokens ( 4.60 ms per token, 217.33 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 318545.57 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.ppx b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.ppx new file mode 100644 index 0000000..0c80939 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 18.174846 ± 0.142320 +Mean PPL(base) : 6.180577 ± 0.041038 +Cor(ln(PPL(Q)), ln(PPL(base))): 75.14% +Mean ln(PPL(Q)/PPL(base)) : 1.078627 ± 0.005222 +Mean PPL(Q)/PPL(base) : 2.940639 ± 0.015356 +Mean PPL(Q)-PPL(base) : 11.994269 ± 0.114726 + +====== KL divergence statistics ====== +Mean KLD: 1.159685 ± 0.004238 +Maximum KLD: 28.100733 +99.9% KLD: 14.541190 +99.0% KLD: 8.790474 +99.0% KLD: 8.790474 +Median KLD: 0.682733 +10.0% KLD: 0.066346 + 5.0% KLD: 0.024022 + 1.0% KLD: 0.004240 +Minimum KLD: 0.000159 + +====== Token probability statistics ====== +Mean Δp: -16.552 ± 0.083 % +Maximum Δp: 94.307% +99.9% Δp: 72.111% +99.0% Δp: 47.813% +95.0% Δp: 23.079% +90.0% Δp: 10.497% +75.0% Δp: 0.030% +Median Δp: -4.029% +25.0% Δp: -29.474% +10.0% Δp: -70.364% + 5.0% Δp: -90.072% + 1.0% Δp: -99.761% + 0.1% Δp: -99.966% +Minimum Δp: -99.998% +RMS Δp : 36.214 ± 0.088 % +Same top p: 64.455 ± 0.123 % diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.tqa b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.tqa new file mode 100644 index 0000000..6d4a7ae --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.tqa @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_M.gguf (version GGUF V3 (latest)) + +Final result: 37.8667 +/- 1.7724 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 1018.74 ms +llama_perf_context_print: prompt eval time = 243028.85 ms / 51053 tokens ( 4.76 ms per token, 210.07 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 244621.86 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.wng b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.wng new file mode 100644 index 0000000..833fddf --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_m.wng @@ -0,0 +1,19 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_M.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 73.8667 +/- 1.6054 + +llama_perf_context_print: load time = 1080.02 ms +llama_perf_context_print: prompt eval time = 105347.75 ms / 22541 tokens ( 4.67 ms per token, 213.97 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 105836.33 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.arc b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.arc new file mode 100644 index 0000000..dfc0a2a --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.arc @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_S.gguf (version GGUF V3 (latest)) + +Final result: 67.3333 +/- 1.7137 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 5824.40 ms +llama_perf_context_print: prompt eval time = 172340.28 ms / 36666 tokens ( 4.70 ms per token, 212.75 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 173198.86 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.hsw b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.hsw new file mode 100644 index 0000000..8abfbeb --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.hsw @@ -0,0 +1,20 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_S.gguf (version GGUF V3 (latest)) + +750 81.46666667% [78.5279%, 84.0847%] + + +llama_perf_context_print: load time = 1049.43 ms +llama_perf_context_print: prompt eval time = 592449.23 ms / 129319 tokens ( 4.58 ms per token, 218.28 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 596100.86 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.mmlu b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.mmlu new file mode 100644 index 0000000..fbb7b47 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.mmlu @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_S.gguf (version GGUF V3 (latest)) + +Final result: 44.2667 +/- 1.8149 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 1030.41 ms +llama_perf_context_print: prompt eval time = 307801.57 ms / 68956 tokens ( 4.46 ms per token, 224.03 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 309115.30 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.ppx b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.ppx new file mode 100644 index 0000000..4568af6 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 18.199918 ± 0.142513 +Mean PPL(base) : 6.180577 ± 0.041038 +Cor(ln(PPL(Q)), ln(PPL(base))): 75.20% +Mean ln(PPL(Q)/PPL(base)) : 1.080005 ± 0.005216 +Mean PPL(Q)/PPL(base) : 2.944695 ± 0.015358 +Mean PPL(Q)-PPL(base) : 12.019340 ± 0.114880 + +====== KL divergence statistics ====== +Mean KLD: 1.160040 ± 0.004229 +Maximum KLD: 27.444889 +99.9% KLD: 14.604442 +99.0% KLD: 8.763094 +99.0% KLD: 8.763094 +Median KLD: 0.682655 +10.0% KLD: 0.066186 + 5.0% KLD: 0.023837 + 1.0% KLD: 0.004326 +Minimum KLD: 0.000148 + +====== Token probability statistics ====== +Mean Δp: -16.565 ± 0.083 % +Maximum Δp: 94.238% +99.9% Δp: 72.114% +99.0% Δp: 47.597% +95.0% Δp: 23.064% +90.0% Δp: 10.544% +75.0% Δp: 0.030% +Median Δp: -4.020% +25.0% Δp: -29.497% +10.0% Δp: -70.424% + 5.0% Δp: -90.053% + 1.0% Δp: -99.759% + 0.1% Δp: -99.965% +Minimum Δp: -99.998% +RMS Δp : 36.220 ± 0.088 % +Same top p: 64.431 ± 0.123 % diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.tqa b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.tqa new file mode 100644 index 0000000..f77b7b2 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.tqa @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_S.gguf (version GGUF V3 (latest)) + +Final result: 38.6667 +/- 1.7794 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 984.39 ms +llama_perf_context_print: prompt eval time = 235496.75 ms / 51053 tokens ( 4.61 ms per token, 216.79 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 237131.13 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.wng b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.wng new file mode 100644 index 0000000..a6fb105 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q5_k_s.wng @@ -0,0 +1,19 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q5_K_S.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 74.2667 +/- 1.5974 + +llama_perf_context_print: load time = 1051.46 ms +llama_perf_context_print: prompt eval time = 102348.25 ms / 22541 tokens ( 4.54 ms per token, 220.24 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 102869.23 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.arc b/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.arc new file mode 100644 index 0000000..0582682 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.arc @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q6_K.gguf (version GGUF V3 (latest)) + +Final result: 67.4667 +/- 1.7119 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 7664.20 ms +llama_perf_context_print: prompt eval time = 175407.42 ms / 36666 tokens ( 4.78 ms per token, 209.03 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 176202.75 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.hsw b/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.hsw new file mode 100644 index 0000000..83e76a0 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.hsw @@ -0,0 +1,20 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q6_K.gguf (version GGUF V3 (latest)) + +750 81.06666667% [78.1072%, 83.7095%] + + +llama_perf_context_print: load time = 1225.60 ms +llama_perf_context_print: prompt eval time = 622423.40 ms / 129319 tokens ( 4.81 ms per token, 207.77 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 626130.15 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.mmlu b/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.mmlu new file mode 100644 index 0000000..02e475b --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.mmlu @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q6_K.gguf (version GGUF V3 (latest)) + +Final result: 44.5333 +/- 1.8160 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 1255.72 ms +llama_perf_context_print: prompt eval time = 327772.92 ms / 68956 tokens ( 4.75 ms per token, 210.38 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 329047.99 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.ppx b/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.ppx new file mode 100644 index 0000000..e1e6e97 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 18.213825 ± 0.142965 +Mean PPL(base) : 6.180577 ± 0.041038 +Cor(ln(PPL(Q)), ln(PPL(base))): 75.05% +Mean ln(PPL(Q)/PPL(base)) : 1.080769 ± 0.005241 +Mean PPL(Q)/PPL(base) : 2.946946 ± 0.015446 +Mean PPL(Q)-PPL(base) : 12.033248 ± 0.115399 + +====== KL divergence statistics ====== +Mean KLD: 1.158026 ± 0.004262 +Maximum KLD: 26.265640 +99.9% KLD: 14.634221 +99.0% KLD: 8.878881 +99.0% KLD: 8.878881 +Median KLD: 0.679157 +10.0% KLD: 0.065587 + 5.0% KLD: 0.023885 + 1.0% KLD: 0.004187 +Minimum KLD: 0.000139 + +====== Token probability statistics ====== +Mean Δp: -16.509 ± 0.083 % +Maximum Δp: 94.239% +99.9% Δp: 72.141% +99.0% Δp: 48.282% +95.0% Δp: 23.325% +90.0% Δp: 10.605% +75.0% Δp: 0.032% +Median Δp: -4.000% +25.0% Δp: -29.434% +10.0% Δp: -70.325% + 5.0% Δp: -90.061% + 1.0% Δp: -99.771% + 0.1% Δp: -99.967% +Minimum Δp: -99.998% +RMS Δp : 36.219 ± 0.088 % +Same top p: 64.447 ± 0.123 % diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.tqa b/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.tqa new file mode 100644 index 0000000..ac4b9b7 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.tqa @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q6_K.gguf (version GGUF V3 (latest)) + +Final result: 39.6000 +/- 1.7870 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 1251.53 ms +llama_perf_context_print: prompt eval time = 250368.11 ms / 51053 tokens ( 4.90 ms per token, 203.91 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 251979.66 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.wng b/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.wng new file mode 100644 index 0000000..5561faf --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q6_k.wng @@ -0,0 +1,19 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q6_K.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 73.8667 +/- 1.6054 + +llama_perf_context_print: load time = 1250.90 ms +llama_perf_context_print: prompt eval time = 108113.49 ms / 22541 tokens ( 4.80 ms per token, 208.49 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 108625.99 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.arc b/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.arc new file mode 100644 index 0000000..9f2d143 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.arc @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q8_0.gguf (version GGUF V3 (latest)) + +Final result: 68.1333 +/- 1.7026 +Random chance: 25.0083 +/- 1.5824 + + +llama_perf_context_print: load time = 10486.40 ms +llama_perf_context_print: prompt eval time = 161243.77 ms / 36666 tokens ( 4.40 ms per token, 227.39 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 162055.73 ms / 36667 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.hsw b/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.hsw new file mode 100644 index 0000000..b0813b0 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.hsw @@ -0,0 +1,20 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q8_0.gguf (version GGUF V3 (latest)) + +750 81.33333333% [78.3876%, 83.9597%] + + +llama_perf_context_print: load time = 1550.41 ms +llama_perf_context_print: prompt eval time = 553902.33 ms / 129319 tokens ( 4.28 ms per token, 233.47 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 557360.27 ms / 129320 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.mmlu b/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.mmlu new file mode 100644 index 0000000..357b62e --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.mmlu @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q8_0.gguf (version GGUF V3 (latest)) + +Final result: 44.9333 +/- 1.8176 +Random chance: 25.0000 +/- 1.5822 + + +llama_perf_context_print: load time = 1526.16 ms +llama_perf_context_print: prompt eval time = 290805.14 ms / 68956 tokens ( 4.22 ms per token, 237.12 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 292006.82 ms / 68957 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.ppx b/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.ppx new file mode 100644 index 0000000..a7983dd --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.ppx @@ -0,0 +1,37 @@ +====== Perplexity statistics ====== +Mean PPL(Q) : 18.203515 ± 0.142826 +Mean PPL(base) : 6.180577 ± 0.041038 +Cor(ln(PPL(Q)), ln(PPL(base))): 75.02% +Mean ln(PPL(Q)/PPL(base)) : 1.080203 ± 0.005242 +Mean PPL(Q)/PPL(base) : 2.945277 ± 0.015439 +Mean PPL(Q)-PPL(base) : 12.022938 ± 0.115276 + +====== KL divergence statistics ====== +Mean KLD: 1.158351 ± 0.004265 +Maximum KLD: 27.082415 +99.9% KLD: 14.510898 +99.0% KLD: 8.873251 +99.0% KLD: 8.873251 +Median KLD: 0.678687 +10.0% KLD: 0.065954 + 5.0% KLD: 0.024006 + 1.0% KLD: 0.004161 +Minimum KLD: 0.000141 + +====== Token probability statistics ====== +Mean Δp: -16.515 ± 0.083 % +Maximum Δp: 94.585% +99.9% Δp: 72.074% +99.0% Δp: 48.296% +95.0% Δp: 23.169% +90.0% Δp: 10.662% +75.0% Δp: 0.031% +Median Δp: -4.006% +25.0% Δp: -29.412% +10.0% Δp: -70.362% + 5.0% Δp: -90.143% + 1.0% Δp: -99.767% + 0.1% Δp: -99.966% +Minimum Δp: -99.998% +RMS Δp : 36.227 ± 0.088 % +Same top p: 64.562 ± 0.123 % diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.tqa b/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.tqa new file mode 100644 index 0000000..f858242 --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.tqa @@ -0,0 +1,21 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q8_0.gguf (version GGUF V3 (latest)) + +Final result: 38.2667 +/- 1.7759 +Random chance: 19.8992 +/- 1.4588 + + +llama_perf_context_print: load time = 1498.38 ms +llama_perf_context_print: prompt eval time = 227802.62 ms / 51053 tokens ( 4.46 ms per token, 224.11 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 229374.00 ms / 51054 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) diff --git a/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.wng b/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.wng new file mode 100644 index 0000000..870cbad --- /dev/null +++ b/scores/Dolphin-Mistral-24B-Venice-Edition-q8_0.wng @@ -0,0 +1,19 @@ +build: 5770 (b25e9277) with Apple clang version 17.0.0 (clang-1700.0.13.3) for arm64-apple-darwin24.4.0 +llama_model_load_from_file_impl: using device Metal (Apple M4 Max) - 49151 MiB free +llama_model_loader: loaded meta data with 43 key-value pairs and 345 tensors from ./Dolphin-Mistral-24B-Venice-Edition-pruned-Q8_0.gguf (version GGUF V3 (latest)) + +Final Winogrande score(750 tasks): 74.4000 +/- 1.5947 + +llama_perf_context_print: load time = 1463.29 ms +llama_perf_context_print: prompt eval time = 100036.10 ms / 22541 tokens ( 4.44 ms per token, 225.33 tokens per second) +llama_perf_context_print: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second) +llama_perf_context_print: total time = 100546.29 ms / 22542 tokens +ggml_metal_free: deallocating +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0) +ggml_metal_mem_pool_free: freeing memory pool, num heaps = 0 (total = 0)