初始化项目,由ModelHub XC社区提供模型
Model: eaddario/granite-4.1-8b-GGUF Source: Original Platform
This commit is contained in:
38
.gitattributes
vendored
Normal file
38
.gitattributes
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
*.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
*.logits filter=lfs diff=lfs merge=lfs -text
|
||||
*.dat filter=lfs diff=lfs merge=lfs -text
|
||||
*.bpw_state filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
584
.gitignore
vendored
Normal file
584
.gitignore
vendored
Normal file
@@ -0,0 +1,584 @@
|
||||
# https://github.com/github/gitignore
|
||||
|
||||
# -------------
|
||||
# General Linux
|
||||
# -------------
|
||||
.directory
|
||||
.fuse_hidden*
|
||||
.nfs*
|
||||
.Trash-*
|
||||
*~
|
||||
|
||||
# -----------
|
||||
# General OSX
|
||||
# -----------
|
||||
._*
|
||||
.apdisk
|
||||
.AppleDB
|
||||
.AppleDesktop
|
||||
.AppleDouble
|
||||
.com.apple.timemachine.donotpresent
|
||||
.DocumentRevisions-V100
|
||||
.DS_Store
|
||||
.fseventsd
|
||||
.LSOverride
|
||||
.Spotlight-V100
|
||||
.TemporaryItems
|
||||
.Trashes
|
||||
.VolumeIcon.icns
|
||||
Network Trash Folder
|
||||
Temporary Items
|
||||
|
||||
# ---------------
|
||||
# General Windows
|
||||
# ---------------
|
||||
[Dd]esktop.ini
|
||||
*.cab
|
||||
*.lnk
|
||||
*.msi
|
||||
*.msix
|
||||
*.msm
|
||||
*.msp
|
||||
*.stackdump
|
||||
$RECYCLE.BIN/
|
||||
ehthumbs_vista.db
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
Thumbs.db:encryptable
|
||||
|
||||
# -----------------
|
||||
# General JetBrains
|
||||
# -----------------
|
||||
.idea_modules/
|
||||
.idea/
|
||||
*.iml
|
||||
*.ipr
|
||||
*.iws
|
||||
|
||||
# ---------------
|
||||
# General VS Code
|
||||
# ---------------
|
||||
!.vscode/*.code-snippets
|
||||
!.vscode/extensions.json
|
||||
!.vscode/launch.json
|
||||
!.vscode/settings.json
|
||||
!.vscode/tasks.json
|
||||
.history/
|
||||
.vscode/
|
||||
*.vsix
|
||||
|
||||
# ---------------------
|
||||
# General Visual Studio
|
||||
# ---------------------
|
||||
__pycache__/
|
||||
_NCrunch_*
|
||||
_pkginfo.txt
|
||||
_Pvt_Extensions
|
||||
_ReSharper*/
|
||||
_TeamCity*
|
||||
_UpgradeReport_Files/
|
||||
!?*.[Cc]ache/
|
||||
!.vscode/extensions.json
|
||||
!**/[Pp]ackages/build/
|
||||
.*crunch*.local.xml
|
||||
.builds
|
||||
.cr/personal
|
||||
.fake/
|
||||
.ionide/
|
||||
.localhistory/
|
||||
.mfractor/
|
||||
.ntvs_analysis.dat
|
||||
.paket/paket.exe
|
||||
.sass-cache/
|
||||
.vs/
|
||||
.vscode/*
|
||||
.vshistory/
|
||||
[Aa][Rr][Mm]/
|
||||
[Aa][Rr][Mm]64/
|
||||
[Bb]in/
|
||||
[Bb]uild[Ll]og.*
|
||||
[Dd]ebug/
|
||||
[Dd]ebugPS/
|
||||
[Dd]ebugPublic/
|
||||
[Ee]xpress/
|
||||
[Ll]og/
|
||||
[Ll]ogs/
|
||||
[Oo]bj/
|
||||
[Rr]elease/
|
||||
[Rr]eleasePS/
|
||||
[Rr]eleases/
|
||||
[Tt]est[Rr]esult*/
|
||||
[Ww][Ii][Nn]32/
|
||||
*_h.h
|
||||
*_i.c
|
||||
*_p.c
|
||||
*_wpftmp.csproj
|
||||
*- [Bb]ackup ([0-9]).rdl
|
||||
*- [Bb]ackup ([0-9][0-9]).rdl
|
||||
*- [Bb]ackup.rdl
|
||||
*.[Cc]ache
|
||||
*.[Pp]ublish.xml
|
||||
*.[Rr]e[Ss]harper
|
||||
*.appx
|
||||
*.appxbundle
|
||||
*.appxupload
|
||||
*.aps
|
||||
*.azurePubxml
|
||||
*.bim_*.settings
|
||||
*.bim.layout
|
||||
*.binlog
|
||||
*.btm.cs
|
||||
*.btp.cs
|
||||
*.build.csdef
|
||||
*.cachefile
|
||||
*.code-workspace
|
||||
*.coverage
|
||||
*.coveragexml
|
||||
*.dbmdl
|
||||
*.dbproj.schemaview
|
||||
*.dotCover
|
||||
*.DotSettings.user
|
||||
*.dsp
|
||||
*.dsw
|
||||
*.e2e
|
||||
*.GhostDoc.xml
|
||||
*.gpState
|
||||
*.ilk
|
||||
*.iobj
|
||||
*.ipdb
|
||||
*.jfm
|
||||
*.jmconfig
|
||||
*.ldf
|
||||
*.mdf
|
||||
*.meta
|
||||
*.mm.*
|
||||
*.ncb
|
||||
*.ndf
|
||||
*.nuget.props
|
||||
*.nuget.targets
|
||||
*.nupkg
|
||||
*.nvuser
|
||||
*.obj
|
||||
*.odx.cs
|
||||
*.opendb
|
||||
*.opensdf
|
||||
*.opt
|
||||
*.pch
|
||||
*.pdb
|
||||
*.pfx
|
||||
*.pgc
|
||||
*.pgd
|
||||
*.pidb
|
||||
*.plg
|
||||
*.psess
|
||||
*.publishproj
|
||||
*.publishsettings
|
||||
*.pubxml
|
||||
*.pyc
|
||||
*.rdl.data
|
||||
*.rptproj.bak
|
||||
*.rptproj.rsuser
|
||||
*.rsp
|
||||
*.rsuser
|
||||
*.sap
|
||||
*.sbr
|
||||
*.scc
|
||||
*.sdf
|
||||
*.sln.docstates
|
||||
*.sln.iml
|
||||
*.snupkg
|
||||
*.suo
|
||||
*.svclog
|
||||
*.tlb
|
||||
*.tlh
|
||||
*.tli
|
||||
*.tlog
|
||||
*.tmp
|
||||
*.tmp_proj
|
||||
*.tss
|
||||
*.user
|
||||
*.userosscache
|
||||
*.userprefs
|
||||
*.vbp
|
||||
*.vbw
|
||||
*.VC.db
|
||||
*.VC.VC.opendb
|
||||
*.VisualState.xml
|
||||
*.vsp
|
||||
*.vspscc
|
||||
*.vspx
|
||||
*.vssscc
|
||||
*.xsd.cs
|
||||
**/[Pp]ackages/*
|
||||
**/*.DesktopClient/GeneratedArtifacts
|
||||
**/*.DesktopClient/ModelManifest.xml
|
||||
**/*.HTMLClient/GeneratedArtifacts
|
||||
**/*.Server/GeneratedArtifacts
|
||||
**/*.Server/ModelManifest.xml
|
||||
~$*
|
||||
$tf/
|
||||
AppPackages/
|
||||
artifacts/
|
||||
ASALocalRun/
|
||||
AutoTest.Net/
|
||||
Backup*/
|
||||
BenchmarkDotNet.Artifacts/
|
||||
bld/
|
||||
BundleArtifacts/
|
||||
ClientBin/
|
||||
coverage*.info
|
||||
coverage*.json
|
||||
coverage*.xml
|
||||
csx/
|
||||
dlldata.c
|
||||
DocProject/buildhelp/
|
||||
DocProject/Help/*.hhc
|
||||
DocProject/Help/*.hhk
|
||||
DocProject/Help/*.hhp
|
||||
DocProject/Help/*.HxC
|
||||
DocProject/Help/*.HxT
|
||||
DocProject/Help/html
|
||||
DocProject/Help/Html2
|
||||
ecf/
|
||||
FakesAssemblies/
|
||||
FodyWeavers.xsd
|
||||
Generated_Code/
|
||||
healthchecksdb
|
||||
ipch/
|
||||
MigrationBackup/
|
||||
mono_crash.*
|
||||
nCrunchTemp_*
|
||||
node_modules/
|
||||
nunit-*.xml
|
||||
OpenCover/
|
||||
orleans.codegen.cs
|
||||
Package.StoreAssociation.xml
|
||||
paket-files/
|
||||
project.fragment.lock.json
|
||||
project.lock.json
|
||||
publish/
|
||||
PublishScripts/
|
||||
rcf/
|
||||
ScaffoldingReadMe.txt
|
||||
ServiceFabricBackup/
|
||||
StyleCopReport.xml
|
||||
TestResult.xml
|
||||
UpgradeLog*.htm
|
||||
UpgradeLog*.XML
|
||||
x64/
|
||||
x86/
|
||||
|
||||
# ----------------------
|
||||
# General Archived Files
|
||||
# ----------------------
|
||||
*.7z
|
||||
*.bz2
|
||||
*.bzip
|
||||
*.bzip2
|
||||
*.deb
|
||||
*.dmg
|
||||
*.egg
|
||||
*.gem
|
||||
*.gz
|
||||
*.gzip
|
||||
*.iso
|
||||
*.jar
|
||||
*.lzma
|
||||
*.rar
|
||||
*.rpm
|
||||
*.tar
|
||||
*.tgz
|
||||
*.txz
|
||||
*.tzst
|
||||
*.xar
|
||||
*.xpi
|
||||
*.xz
|
||||
*.zip
|
||||
*.zst
|
||||
|
||||
# -----
|
||||
# C/C++
|
||||
# -----
|
||||
.tmp_versions/
|
||||
*.a
|
||||
*.app
|
||||
*.cmd
|
||||
*.d
|
||||
*.dll
|
||||
*.dSYM/
|
||||
*.dylib
|
||||
*.elf
|
||||
*.exe
|
||||
*.exp
|
||||
*.gch
|
||||
*.hex
|
||||
*.i*86
|
||||
*.idb
|
||||
*.ko
|
||||
*.la
|
||||
*.lai
|
||||
*.lib
|
||||
*.lo
|
||||
*.map
|
||||
*.mod*
|
||||
*.o
|
||||
*.out
|
||||
*.slo
|
||||
*.so
|
||||
*.so.*
|
||||
*.su
|
||||
*.x86_64
|
||||
dkms.conf
|
||||
Mkfile.old
|
||||
Module.symvers
|
||||
modules.order
|
||||
|
||||
# ----
|
||||
# CUDA
|
||||
# ----
|
||||
*.cubin
|
||||
*.fatbin
|
||||
*.gpu
|
||||
*.i
|
||||
*.ii
|
||||
*.ptx
|
||||
|
||||
# --
|
||||
# Go
|
||||
# --
|
||||
.env
|
||||
*.exe~
|
||||
*.test
|
||||
go.work
|
||||
go.work.sum
|
||||
|
||||
# ----
|
||||
# Java
|
||||
# ----
|
||||
*.class
|
||||
*.ctxt
|
||||
*.ear
|
||||
*.hprof
|
||||
*.nar
|
||||
*.tar.gz
|
||||
*.war
|
||||
hs_err_pid*
|
||||
replay_pid*
|
||||
|
||||
# -----
|
||||
# Julia
|
||||
# -----
|
||||
*.jl.*.cov
|
||||
*.jl.cov
|
||||
*.jl.mem
|
||||
deps/build.log
|
||||
deps/deps.jl
|
||||
deps/downloads/
|
||||
deps/src/
|
||||
deps/usr/
|
||||
docs/build/
|
||||
docs/site/
|
||||
Manifest.toml
|
||||
|
||||
# ------------------------------
|
||||
# JavaScript / Node / TypeScript
|
||||
# ------------------------------
|
||||
.cache
|
||||
.cache/
|
||||
.docusaurus
|
||||
.dynamodb/
|
||||
.env.development.local
|
||||
.env.local
|
||||
.env.production.local
|
||||
.env.test.local
|
||||
.eslintcache
|
||||
.fusebox/
|
||||
.grunt
|
||||
.lock-wscript
|
||||
.next
|
||||
.node_repl_history
|
||||
.npm
|
||||
.nuxt
|
||||
.nyc_output
|
||||
.parcel-cache
|
||||
.pnp.*
|
||||
.pnpm-debug.log*
|
||||
.rpt2_cache/
|
||||
.rts2_cache_cjs/
|
||||
.rts2_cache_es/
|
||||
.rts2_cache_umd/
|
||||
.serverless/
|
||||
.stylelintcache
|
||||
.temp
|
||||
.tern-port
|
||||
.vscode-test
|
||||
.vuepress/dist
|
||||
.yarn-integrity
|
||||
.yarn/build-state.yml
|
||||
.yarn/cache
|
||||
.yarn/install-state.gz
|
||||
.yarn/unplugged
|
||||
*.lcov
|
||||
*.pid
|
||||
*.pid.lock
|
||||
*.seed
|
||||
*.tsbuildinfo
|
||||
bower_components
|
||||
dist
|
||||
jspm_packages/
|
||||
lerna-debug.log*
|
||||
lib-cov
|
||||
logs
|
||||
npm-debug.log*
|
||||
out
|
||||
pids
|
||||
report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
|
||||
web_modules/
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
|
||||
# ------
|
||||
# Python
|
||||
# ------
|
||||
__pypackages__/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.dmypy.json
|
||||
.eggs/
|
||||
.hypothesis/
|
||||
.installed.cfg
|
||||
.ipynb_checkpoints
|
||||
.mypy_cache/
|
||||
.nox/
|
||||
.pdm-build/
|
||||
.pdm-python
|
||||
.pdm.toml
|
||||
.pybuilder/
|
||||
.pyre/
|
||||
.pytest_cache/
|
||||
.Python
|
||||
.python-version
|
||||
.pytype/
|
||||
.ropeproject
|
||||
.scrapy
|
||||
.spyderproject
|
||||
.spyproject
|
||||
.tox/
|
||||
.venv
|
||||
.webassets-cache
|
||||
*.cover
|
||||
*.egg-info/
|
||||
*.manifest
|
||||
*.mo
|
||||
*.pot
|
||||
*.py,cover
|
||||
*.py[cod]
|
||||
*.sage.py
|
||||
*.spec
|
||||
*$py.class
|
||||
/site
|
||||
build/
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
cover/
|
||||
coverage.xml
|
||||
cython_debug/
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
develop-eggs/
|
||||
dist/
|
||||
dmypy.json
|
||||
docs/_build/
|
||||
downloads/
|
||||
eggs/
|
||||
env.bak/
|
||||
env/
|
||||
htmlcov/
|
||||
instance/
|
||||
ipython_config.py
|
||||
lib/
|
||||
lib64/
|
||||
local_settings.py
|
||||
MANIFEST
|
||||
nosetests.xml
|
||||
parts/
|
||||
pip-delete-this-directory.txt
|
||||
pip-log.txt
|
||||
profile_default/
|
||||
sdist/
|
||||
share/python-wheels/
|
||||
target/
|
||||
var/
|
||||
venv.bak/
|
||||
venv/
|
||||
wheels/
|
||||
|
||||
# ----
|
||||
# Rust
|
||||
# ----
|
||||
**/*.rs.bk
|
||||
Cargo.lock
|
||||
debug/
|
||||
|
||||
# -----
|
||||
# Scala
|
||||
# -----
|
||||
|
||||
# -----
|
||||
# CMake
|
||||
# -----
|
||||
_deps
|
||||
cmake_install.cmake
|
||||
CMakeCache.txt
|
||||
CMakeFiles
|
||||
CMakeLists.txt.user
|
||||
CMakeScripts
|
||||
CMakeUserPresets.json
|
||||
compile_commands.json
|
||||
CTestTestfile.cmake
|
||||
install_manifest.txt
|
||||
Makefile
|
||||
Testing
|
||||
|
||||
# ------
|
||||
# Gradle
|
||||
# ------
|
||||
!gradle-wrapper.jar
|
||||
!gradle-wrapper.properties
|
||||
!src/**/build/
|
||||
.classpath
|
||||
.gradle
|
||||
.gradletasknamecache
|
||||
.project
|
||||
**/build/
|
||||
gradle-app.setting
|
||||
|
||||
# -----
|
||||
# Maven
|
||||
# -----
|
||||
.mvn/
|
||||
buildNumber.properties
|
||||
dependency-reduced-pom.xml
|
||||
pom.xml.next
|
||||
pom.xml.releaseBackup
|
||||
pom.xml.tag
|
||||
pom.xml.versionsBackup
|
||||
release.properties
|
||||
|
||||
# ---------
|
||||
# Terraform
|
||||
# ---------
|
||||
.terraform.tfstate.lock.info
|
||||
.terraform/
|
||||
.terraformrc
|
||||
*_override.tf
|
||||
*_override.tf.json
|
||||
*.tfstate
|
||||
*.tfstate.*
|
||||
*.tfvars
|
||||
*.tfvars.json
|
||||
crash.*.log
|
||||
crash.log
|
||||
override.tf
|
||||
override.tf.json
|
||||
terraform.rc
|
||||
3
Granite_4.1_8b-6f6fd2708e8b1ded.bpw_state
Normal file
3
Granite_4.1_8b-6f6fd2708e8b1ded.bpw_state
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6ab793fef9b02f0b52d04ae0a0bc046d75406ba58d9291b3439fbc5f1f0519fe
|
||||
size 99424
|
||||
238
README.md
Normal file
238
README.md
Normal file
@@ -0,0 +1,238 @@
|
||||
---
|
||||
base_model:
|
||||
- ibm-granite/granite-4.1-8b
|
||||
datasets:
|
||||
- eaddario/imatrix-calibration
|
||||
language:
|
||||
- en
|
||||
license:
|
||||
- apache-2.0
|
||||
pipeline_tag: text-generation
|
||||
tags:
|
||||
- gguf
|
||||
- quant
|
||||
- target_bpw
|
||||
- experimental
|
||||
---
|
||||
|
||||
# Experimental global target bits‑per‑weight quantization of [ibm-granite/granite-4.1-8b](https://huggingface.co/ibm-granite/granite-4.1-8b)
|
||||
Using **non-standard** (forked) [LLaMA C++][llm] release [b9358][llm-rel] for quantization.
|
||||
|
||||
Original model: [ibm-granite/granite-4.1-8b][mdl]
|
||||
|
||||
From the original model creators:
|
||||
> [](https://mot.isitopen.ai/model/1160)
|
||||
>
|
||||
> # Granite-4.1-8B
|
||||
>
|
||||
> **Model Summary:**
|
||||
> Granite-4.1-8B is a 8B parameter long-context instruct model finetuned from *Granite-4.1-8B-Base* using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets. Granite 4.1 models have gone through an improved post-training pipeline, including supervised finetuning and reinforcement learning alignment, resulting in enhanced tool calling, instruction following, and chat capabilities.
|
||||
>
|
||||
> - **Developers:** Granite Team, IBM
|
||||
> - **HF Collection:** [Granite 4.1 Language Models HF Collection](https://huggingface.co/collections/ibm-granite/granite-41-language-models)
|
||||
> - **Technical Blog:** [Granite-4.1 Blog](https://huggingface.co/blog/ibm-granite/granite-4-1)
|
||||
> - **GitHub Repository:** [ibm-granite/granite-4.1-language-models](https://github.com/ibm-granite/granite-4.1-language-models)
|
||||
> - **Website**: [Granite Docs](https://www.ibm.com/granite/docs/)
|
||||
> - **Release Date**: April 29th, 2026
|
||||
> - **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0)
|
||||
>
|
||||
> **Supported Languages:**
|
||||
> English, German, Spanish, French, Japanese, Portuguese, Arabic, Czech, Italian, Korean, Dutch, and Chinese. Users may finetune Granite 4.1 models for languages beyond these languages.
|
||||
>
|
||||
> **Intended use:**
|
||||
> The model is designed to follow general instructions and can serve as the foundation for AI assistants across diverse domains, including business applications, as well as for LLM agents equipped with tool-use capabilities.
|
||||
|
||||
---
|
||||
|
||||
# ⚠️ PLEASE READ THIS BEFORE USING THESE EXPERIMENTAL VERSIONS! ⚠️
|
||||
An area of personal interest is finding ways to optimize the inference performance of LLMs when deployed in resource-constrained environments like commodity hardware, desktops, laptops, mobiles, edge devices, etc. There are many approaches to accomplish this, including architecture simplification and knowledge distillation, but my focus has been primarily on quantization and pruning.
|
||||
|
||||
The method to produce these experimental versions involves using a custom version of [`llama-imatrix`][imx] to generate an imatrix that includes tensor statistics, and a custom version of [`llama-quantize`][qtz], which computes a per-tensor quantization error, to automatically select the lowest error quantization recipe that achieves a global target bits‑per‑weight (bpw). More details on the implementation and test results [here][bpw]
|
||||
|
||||
There are two pull requests ([#14891][imtx-pr] & [#15550][qtz-pr]) to merge these changes back into the core llama.cpp project. This may or may not ever happen so, until then, the modified versions will be available on [GitHub][gh].
|
||||
|
||||
For testing and comparison, I use models produced by [Bartowski][btk] (see credits below) and [Unsloth][ust] ([Daniel and Michael Han][ust-ai] do some really interesting stuff!) but when they don't provide versions of the required model, tests and comparisons are against standard quantization obtained by simply running `llama-quantize` with no further optimizations.
|
||||
|
||||
All experimental versions were generated using an appropriate imatrix created from datasets available at [eaddario/imatrix-calibration][ical]. In `llama.cpp`, an imatrix is a calibration file derived from running representative text through the model and collecting activation statistics. It is used to weight quantization error so that error in more “important” directions (as estimated from activations) is penalized more heavily.
|
||||
|
||||
The process to generate these models is roughly as follows:
|
||||
1. Convert the original model's [safetensors][sfts] to [GGUF][ggf] F16
|
||||
2. Estimate the [Perplexity][ppl] score for the F16 model (baseline) using the [wikitext-2-raw-v1][wki-dat] dataset, and save the [logits][lgt]
|
||||
3. Generate an [imatrix][imx-dat] from the most appropriate [calibration dataset][ical]
|
||||
4. Quantize the baseline model targeting a bpw average (e.g. `llama-quantize --target-bpw 4.5678 --state-file --imatrix imatrix.gguf baseline-model-F16.gguf 12`)
|
||||
5. Calculate Perplexity, KL Divergence, ARC (Easy+Challenge), GPQA-Diamond, HellaSwag, MMLU-Redux, Truthful QA and WinoGrande scores for each quantized model
|
||||
6. Keep version with the best 𝜌PPL and μKLD scores
|
||||
7. Repeat until all desired quants are created
|
||||
|
||||
### Misconceptions about BF16 to F16 Conversion
|
||||
A common concern when converting BFloat16 ([BF16][bf16]) models to Float16 (F16) is the potential for accuracy loss. Specifically:
|
||||
- Weight Clipping (Overflow): Clipping, or overflow, is often feared but only occurs if a model's weights exceed the range of ±65,503. This is a relatively rare issue in practice.
|
||||
- Subnormal Zeroing (Underflow): A more frequent occurrence is underflow, where weights smaller than approximately 5.96x10⁻⁸ are converted to zero.
|
||||
|
||||
Crucially, when the F16 model is subsequently used for quantization, the resulting degradation in metrics like Perplexity ([PPL][ppl]) or Kullback–Leibler Divergence ([KLD][kld]) is minimal. Any variations are typically restricted to the hundreds or thousandths decimal places compared to the BF16 model.
|
||||
|
||||
However, considering that weight clipping presents a more substantial risk to model integrity, every BF16 base model undergoes validation prior to the conversion process. Consequently, no models hosted in this repository exhibit performance degradation due to overflow clipping.
|
||||
|
||||
While BF16 offers precision benefits, performance remains a key factor.
|
||||
- Conversion Speed: Tests, such as timing `convert_hf_to_gguf.py`, show a notable performance difference, with conversion to BF16 being 15–30% slower than to F16.
|
||||
- Inference Speed: A less pronounced but still present difference (3–6%) is observed during inference. Although native BF support has been introduced by many chip manufacturers, the slower performance **may** stem from the entire software and hardware stack (firmware, libraries, etc.) not being fully optimized yet.
|
||||
|
||||
The choice to prioritize F16 over BF16 is driven by a focus on maximizing performance in specific deployment environments. My primary objective is not large-scale quantization production, a domain where others like [Bartowski][btk] and [Unsloth][ust] excel at, but rather optimizing inference performance for resource-constrained environments. Since BF16 support is not yet widespread in areas like mobile, edge, and embedded devices, using F16 ensures broader compatibility and easier optimization for these use cases.
|
||||
|
||||
# Advantages and disadvantages of the global target bits‑per‑weight quantization process
|
||||
### Advantages
|
||||
1. **Target arbitrary size models**
|
||||
- When specifying `--target-bpw 4.5678` for instance, the algorithm will produce a model (nearly) exactly of that size, which is very useful for maximizing VRAM usage. In a system with 24GB VRAM and a 70B model, standard quants might produce a 16.8GB file (too small, quality left on table) or a 24.1GB file (won't fit). This approach can generate a 23.85GB file to utilize the hardware fully.
|
||||
|
||||
2. **Data-driven mixed precision often can improve quality at fixed size**
|
||||
- Instead of using hardcoded heuristics (e.g. make `attn_v` Q5_K for a 70B model), that may be sub‑optimal for a given architecture or size, the quantization mix is determined by the actual error sensitivity of the specific model's weights. This, in practice, often yields a better quality/size trade-off, especially in aggressive quantization scenarios (1.5 to 3.5 bpw), or for unusual architectures.
|
||||
|
||||
- **Please note**: `llama.cpp`’s heuristics have been tuned across many models and are highly optimized; although the target bpw method produces better quality often (>75% based on tests with 130 models from 11 different families), it can also lose in surprising cases.
|
||||
|
||||
3. **Allows better like-for-like comparisons between models and families**
|
||||
- Standard `llama.cpp` quantization uses hardcoded rules like: *"use Q4_K_M, except bump some tensors up/down, except fall back if incompatible, except keep some tensors unquantized..."* and for that reason, two different models quantized with the same Q4_K_M type can end up with very different bpw (e.g. 4.75 and 4.30).
|
||||
|
||||
- All things being equal, the performance of a model is usually proportional to its overall bpw size; models with a higher bpw tend to perform better than lower bpw models. Since model A has simply been given more bits, it will typically perform better (lower perplexity, better eval scores, etc.) even if the underlying quantization method is identical. That makes comparing the performance not a controlled experiment, because the comparison is between models with different effective compression ratios.
|
||||
|
||||
- `--target-bpw` tries to address that by making the experiment more controlled: each model gets quantized to land on (approximately) the same global byte budget, so that the models' performance differences are more attributable to architecture/training differences, quantization error behaviour at the same compression ratio, optimizer’s allocation decisions, etc.
|
||||
|
||||
### Disadvantages
|
||||
1. **Quantization process is significantly slower than standard**
|
||||
- This approach can take 5x-10x longer as it quantizes a sample of most tensors into 15 different formats, dequantizes them back to floats, computes error diffs, and selects the best size/error option that fits the global bpw budget.
|
||||
|
||||
- However, the `--state-file` option will save/use the above-mentioned computations so that future quantizations, for the same model, can be generated at normal speed. It also allows to interrupt the computation process and resume it at a later time.
|
||||
|
||||
2. **The optimization target is only a proxy for the model's performance quality**
|
||||
- The process minimizes a per-tensor estimated error computed from sampled rows, not actual perplexity or divergence of output distributions (a future version may address this). Since errors interact nonlinearly across layers, there are no guarantees it will select the best possible quantization recipe subject to the bpw size constraint.
|
||||
|
||||
3. **An imatrix with activations data is required for best results**
|
||||
- Activation data is required to compute the bias factor (i.e. the systematic error projected onto activation directions). If the imatrix file does not contain activation data, the `--target-bpw` option will refuse to run.
|
||||
|
||||
---
|
||||
|
||||
# Models
|
||||
### Bits per weight, size, perplexity and KL Divergence scores
|
||||
| Model | BPW | Size (GB) | μPPL | 𝜌PPL | μKLD | Same Top-P |
|
||||
| ------------------------------------------------- | ------: | --------: | ------------------: | -----: | -----------------: | ------------: |
|
||||
| [granite-4.1-8b-F16](./granite-4.1-8b-F16.gguf) | 16.0006 | 17.6 | 8.691178 ±0.065443 | 100% | N/A | N/A |
|
||||
| [granite-4.1-8b-Q2_K](./granite-4.1-8b-Q1_L.gguf) | 1.7500 | 1.93 | 87.318832 ±0.781580 | 57.61% | 2.889523 ±0.005948 | 34.309 ±0.125 |
|
||||
| [granite-4.1-8b-Q2_K](./granite-4.1-8b-Q2_K.gguf) | 2.5000 | 2.75 | 12.534216 ±0.095606 | 86.12% | 0.644965 ±0.002755 | 67.231 ±0.124 |
|
||||
| [granite-4.1-8b-Q3_K](./granite-4.1-8b-Q3_K.gguf) | 3.5000 | 3.85 | 9.381594 ±0.070128 | 96.18% | 0.173887 ±0.001079 | 82.732 ±0.100 |
|
||||
| [granite-4.1-8b-Q4_K](./granite-4.1-8b-Q4_K.gguf) | 4.4999 | 4.95 | 8.867438 ±0.067303 | 98.88% | 0.047917 ±0.000392 | 90.937 ±0.076 |
|
||||
| [granite-4.1-8b-Q5_K](./granite-4.1-8b-Q5_K.gguf) | 5.4999 | 6.05 | 8.766150 ±0.066421 | 99.48% | 0.018940 ±0.000165 | 94.120 ±0.062 |
|
||||
| [granite-4.1-8b-Q6_K](./granite-4.1-8b-Q6_K.gguf) | 6.4998 | 7.15 | 8.755199 ±0.066400 | 99.74% | 0.007326 ±0.000066 | 96.165 ±0.051 |
|
||||
| [granite-4.1-8b-Q7_K](./granite-4.1-8b-Q7_K.gguf) | 7.4998 | 8.25 | 8.751241 ±0.066500 | 99.82% | 0.003568 ±0.000040 | 97.235 ±0.043 |
|
||||
| [granite-4.1-8b-Q8_0](./granite-4.1-8b-Q8_0.gguf) | 8.4988 | 9.34 | 8.749119 ±0.066517 | 99.85% | 0.002052 ±0.000024 | 97.749 ±0.039 |
|
||||
|
||||
### ARC, GPQA-Diamond, HellaSwag, MMLU-Redux, Truthful QA, and WinoGrande scores
|
||||
Scores generated using [llama-perplexity][ppl] with 750 tasks per test, and a context size of 1024 tokens.
|
||||
|
||||
For the test data used in the generation of these scores, follow the appropriate links: [ARC Challenge, Truthful QA][tst-dat], [GPQA-Diamond][gpqa-dat], [HellaSwag][hsw-tst], [MMLU-Redux][mrdx], [WinoGrande][wng-tst]
|
||||
|
||||
| Model | ARC Challenge | GPQA-Diamond | HellaSwag | MMLU-Redox | Truthful QA | WinoGrande | Avg Score |
|
||||
| ------------------------------------------------- | --------------: | --------------: | --------: | --------------: | --------------: | --------------: | --------: |
|
||||
| [granite-4.1-8b-Q1_L](./granite-4.1-8b-Q1_L.gguf) | 36.5333 ±1.7594 | 19.1919 ±2.8058 | 36.00 | 27.2000 ±1.6260 | 28.9333 ±1.6569 | 52.5333 ±1.8246 | 33.40 |
|
||||
| [granite-4.1-8b-Q2_K](./granite-4.1-8b-Q2_K.gguf) | 60.4000 ±1.7870 | 29.7980 ±3.2586 | 70.00 | 59.2000 ±1.7958 | 33.4667 ±1.7242 | 65.2000 ±1.7405 | 53.01 |
|
||||
| [granite-4.1-8b-Q3_K](./granite-4.1-8b-Q3_K.gguf) | 62.0000 ±1.7736 | 21.7172 ±2.9377 | 79.33 | 69.2000 ±1.6869 | 39.6000 ±1.7870 | 71.7333 ±1.6453 | 57.26 |
|
||||
| [granite-4.1-8b-Q4_K](./granite-4.1-8b-Q4_K.gguf) | 66.9333 ±1.7190 | 23.2323 ±3.0089 | 79.73 | 71.4667 ±1.6500 | 38.9333 ±1.7816 | 73.4667 ±1.6132 | 58.96 |
|
||||
| [granite-4.1-8b-Q5_K](./granite-4.1-8b-Q5_K.gguf) | 66.4000 ±1.7259 | 22.7273 ±2.9858 | 79.87 | 72.1333 ±1.6382 | 38.5333 ±1.7783 | 73.4667 ±1.6132 | 58.86 |
|
||||
| [granite-4.1-8b-Q6_K](./granite-4.1-8b-Q6_K.gguf) | 67.0667 ±1.7172 | 24.7475 ±3.0746 | 80.13 | 72.6667 ±1.6284 | 38.2667 ±1.7759 | 73.7333 ±1.6080 | 59.44 |
|
||||
| [granite-4.1-8b-Q7_K](./granite-4.1-8b-Q7_K.gguf) | 66.4000 ±1.7259 | 26.7677 ±3.1544 | 80.27 | 72.1333 ±1.6382 | 38.5333 ±1.7783 | 73.6000 ±1.6106 | 59.62 |
|
||||
| [granite-4.1-8b-Q8_0](./granite-4.1-8b-Q8_0.gguf) | 66.8000 ±1.7207 | 26.7677 ±3.1544 | 80.53 | 72.4000 ±1.6334 | 38.4000 ±1.7771 | 73.2000 ±1.6184 | 59.68 |
|
||||
|
||||
### Tokens per second benchmarks
|
||||
Scores generated using [llama-bench][bch]. Standard (`llama-quantize` with no optimization) Q4_K_M quantization included for comparison.
|
||||
|
||||
| model | size | params | backend | threads | test | t/s |
|
||||
| ------------------------------------------------- | -------: | -----: | -------- | ------: | ------------: | ------------: |
|
||||
| [granite-4.1-8b-Q1_L](./granite-4.1-8b-Q1_L.gguf) | 1.79 GiB | 8.79 B | BLAS,MTL | 12 | pp512 | 783.11 ±0.52 |
|
||||
| [granite-4.1-8b-Q1_L](./granite-4.1-8b-Q1_L.gguf) | 1.79 GiB | 8.79 B | BLAS,MTL | 12 | tg128 | 68.68 ±0.17 |
|
||||
| [granite-4.1-8b-Q1_L](./granite-4.1-8b-Q1_L.gguf) | 1.79 GiB | 8.79 B | BLAS,MTL | 12 | pp1024+tg1024 | 108.35 ±1.28 |
|
||||
| [granite-4.1-8b-Q2_K](./granite-4.1-8b-Q2_K.gguf) | 2.56 GiB | 8.79 B | BLAS,MTL | 12 | pp512 | 728.97 ±10.22 |
|
||||
| [granite-4.1-8b-Q2_K](./granite-4.1-8b-Q2_K.gguf) | 2.56 GiB | 8.79 B | BLAS,MTL | 12 | tg128 | 68.76 ±0.21 |
|
||||
| [granite-4.1-8b-Q2_K](./granite-4.1-8b-Q2_K.gguf) | 2.56 GiB | 8.79 B | BLAS,MTL | 12 | pp1024+tg1024 | 108.98 ±0.24 |
|
||||
| [granite-4.1-8b-Q3_K](./granite-4.1-8b-Q3_K.gguf) | 3.58 GiB | 8.79 B | BLAS,MTL | 12 | pp512 | 733.45 ±9.51 |
|
||||
| [granite-4.1-8b-Q3_K](./granite-4.1-8b-Q3_K.gguf) | 3.58 GiB | 8.79 B | BLAS,MTL | 12 | tg128 | 63.63 ±1.20 |
|
||||
| [granite-4.1-8b-Q3_K](./granite-4.1-8b-Q3_K.gguf) | 3.58 GiB | 8.79 B | BLAS,MTL | 12 | pp1024+tg1024 | 94.51 ±1.15 |
|
||||
| [granite-4.1-8b-Q4_K](./granite-4.1-8b-Q4_K.gguf) | 4.61 GiB | 8.79 B | BLAS,MTL | 12 | pp512 | 771.63 ±0.97 |
|
||||
| [granite-4.1-8b-Q4_K](./granite-4.1-8b-Q4_K.gguf) | 4.61 GiB | 8.79 B | BLAS,MTL | 12 | tg128 | 66.33 ±1.24 |
|
||||
| [granite-4.1-8b-Q4_K](./granite-4.1-8b-Q4_K.gguf) | 4.61 GiB | 8.79 B | BLAS,MTL | 12 | pp1024+tg1024 | 105.98 ±4.76 |
|
||||
| [granite-4.1-8b-Q5_K](./granite-4.1-8b-Q5_K.gguf) | 5.63 GiB | 8.79 B | BLAS,MTL | 12 | pp512 | 673.26 ±34.19 |
|
||||
| [granite-4.1-8b-Q5_K](./granite-4.1-8b-Q5_K.gguf) | 5.63 GiB | 8.79 B | BLAS,MTL | 12 | tg128 | 51.29 ±3.09 |
|
||||
| [granite-4.1-8b-Q5_K](./granite-4.1-8b-Q5_K.gguf) | 5.63 GiB | 8.79 B | BLAS,MTL | 12 | pp1024+tg1024 | 83.45 ±2.31 |
|
||||
| [granite-4.1-8b-Q6_K](./granite-4.1-8b-Q6_K.gguf) | 6.65 GiB | 8.79 B | BLAS,MTL | 12 | pp512 | 703.41 ±23.92 |
|
||||
| [granite-4.1-8b-Q6_K](./granite-4.1-8b-Q6_K.gguf) | 6.65 GiB | 8.79 B | BLAS,MTL | 12 | tg128 | 52.12 ±1.38 |
|
||||
| [granite-4.1-8b-Q6_K](./granite-4.1-8b-Q6_K.gguf) | 6.65 GiB | 8.79 B | BLAS,MTL | 12 | pp1024+tg1024 | 87.04 ±0.22 |
|
||||
| [granite-4.1-8b-Q7_K](./granite-4.1-8b-Q7_K.gguf) | 7.68 GiB | 8.79 B | BLAS,MTL | 12 | pp512 | 614.53 ±0.48 |
|
||||
| [granite-4.1-8b-Q7_K](./granite-4.1-8b-Q7_K.gguf) | 7.68 GiB | 8.79 B | BLAS,MTL | 12 | tg128 | 49.47 ±0.59 |
|
||||
| [granite-4.1-8b-Q7_K](./granite-4.1-8b-Q7_K.gguf) | 7.68 GiB | 8.79 B | BLAS,MTL | 12 | pp1024+tg1024 | 83.45 ±0.24 |
|
||||
| [granite-4.1-8b-Q8_0](./granite-4.1-8b-Q8_0.gguf) | 8.70 GiB | 8.79 B | BLAS,MTL | 12 | pp512 | 800.32 ±0.73 |
|
||||
| [granite-4.1-8b-Q8_0](./granite-4.1-8b-Q8_0.gguf) | 8.70 GiB | 8.79 B | BLAS,MTL | 12 | tg128 | 46.66 ±0.04 |
|
||||
| [granite-4.1-8b-Q8_0](./granite-4.1-8b-Q8_0.gguf) | 8.70 GiB | 8.79 B | BLAS,MTL | 12 | pp1024+tg1024 | 77.87 ±0.30 |
|
||||
|
||||
# Metrics used
|
||||
**[Perplexity][ppx]:** one of the key metrics used in NLP evaluation. It measures the quality of a language model by evaluating how well it predicts the next token given a particular sequence of words. A PPL of **1** indicates an exact match between predicted and actual, whereas values greater than one indicate a degree of "surprise" the generated token differs from the expected.
|
||||
|
||||
**[Kullback–Leibler (KL) Divergence][kld]:** a statistical measure of how much a probability distribution differs from another. When quantizing models (or altering the original tensors in any way for that matter), the closest we can preserve the weights' probability distribution to the original model the better, thus the closest to **0** the better.
|
||||
|
||||
**[AI2 Reasoning Challenge (ARC)][arc]:** a benchmark to evaluate the ability of AI models to answer complex science questions that require logical reasoning beyond pattern matching.
|
||||
|
||||
**[GPQA-Diamond][gpqa]:** a challenging dataset of 448 multiple-choice questions written by domain experts in biology, physics, and chemistry.
|
||||
|
||||
**[HellaSwag][hsw]:** the Harder Endings, Longer contexts, and Low-shot Activities for Situations With Adversarial Generations (bit of a mouthful!) is a benchmark designed to test commonsense natural language inference. It requires the model to predict the most likely ending of a sentence.
|
||||
|
||||
**[MMLU][mmlu]:** the Massive Multitask Language Understanding evaluates LLMs’ general knowledge and problem-solving abilities across 57 subjects, including elementary mathematics, US history, computer science, and law.
|
||||
|
||||
**[Truthful QA][tqa]:** evaluates how well LLMs generate truthful responses to questions. It identifies whether AI models can avoid generating false or misleading information, particularly in areas where human knowledge is prone to misconceptions.
|
||||
|
||||
**[Winogrande][wng]:** based on the [Winograd Schema Challenge][wng-chl], is a natural language understanding task requiring models to resolve ambiguities in sentences involving pronoun references.
|
||||
|
||||
## Credits
|
||||
[LLaMa C++][llm] has a large and vibrant community of [contributors][llm-ctt] (~1,600 last time I checked) that actively maintain and extend its functionality, adding new models and architectures almost as fast as they appear. Considering the breakneck speed at which the AI/ML field is advancing, this alone is a remarkable feat!
|
||||
|
||||
While I'm grateful to all contributors, I want to recognise three in particular:
|
||||
* [Colin Kealty][btk] (Bartowski), for the many contributions and for being one of the best sources of high quality quantized models available on Hugging Face
|
||||
* [Georgi Gerganov][ggg] for his amazing work with **llama.cpp** and the **ggml/gguf** libraries
|
||||
* [Iwan Kawrakow][ikk] for being one of the key authors behind the many quantization algorithms and the imatrix functionality.
|
||||
|
||||
[arc]: https://llm-stats.com/benchmarks/ai2-reasoning-challenge-(arc)
|
||||
[base]: https://huggingface.co/ibm-granite/granite-4.1-8b
|
||||
[b-q4km]: https://huggingface.co/bartowski
|
||||
[bch]: https://github.com/ggml-org/llama.cpp/tree/master/tools/llama-bench
|
||||
[bf16]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format
|
||||
[bpw]: https://github.com/ggml-org/llama.cpp/discussions/18531
|
||||
[btk]: https://huggingface.co/bartowski
|
||||
[ggf]: https://huggingface.co/docs/hub/en/gguf
|
||||
[ggg]: https://github.com/ggerganov
|
||||
[gh]: https://github.com/EAddario/llama.cpp/tree/master
|
||||
[gpqa]: https://arxiv.org/abs/2311.12022
|
||||
[gpqa-dat]: https://huggingface.co/datasets/eaddario/benchmark
|
||||
[hsw-tst]: https://github.com/klosax/hellaswag_text_data
|
||||
[hsw]: https://rowanzellers.com/hellaswag
|
||||
[ical]: https://huggingface.co/datasets/eaddario/imatrix-calibration
|
||||
[ikk]: https://github.com/ikawrakow
|
||||
[imtx-pr]: https://github.com/ggml-org/llama.cpp/pull/14891
|
||||
[imx-dat]: https://huggingface.co/eaddario/granite-4.1-8b-GGUF/tree/main/imatrix
|
||||
[imx]: https://github.com/EAddario/llama.cpp/tree/imatrix
|
||||
[kld]: https://en.wikipedia.org/wiki/Kullback–Leibler_divergence
|
||||
[lgt]: https://huggingface.co/eaddario/granite-4.1-8b-GGUF/tree/main/logits
|
||||
[llm-ctt]: https://github.com/ggml-org/llama.cpp/graphs/contributors
|
||||
[llm-rel]: https://github.com/ggml-org/llama.cpp/releases/tag/b9358
|
||||
[llm]: https://github.com/ggerganov/llama.cpp
|
||||
[mdl]: https://huggingface.co/ibm-granite/granite-4.1-8b
|
||||
[mmlu]: https://en.wikipedia.org/wiki/MMLU
|
||||
[mrdx]: https://huggingface.co/datasets/Green-Sky/mmlu-redux-2.0-for-llama.cpp
|
||||
[ppl]: https://github.com/ggml-org/llama.cpp/tree/master/tools/perplexity
|
||||
[ppx]: https://huggingface.co/docs/transformers/en/perplexity
|
||||
[qtz-pr]: https://github.com/ggml-org/llama.cpp/pull/15550
|
||||
[qtz]: https://github.com/EAddario/llama.cpp/tree/quantize
|
||||
[sfts]: https://huggingface.co/docs/safetensors/en/index
|
||||
[tqa]: https://github.com/sylinrl/TruthfulQA
|
||||
[tst-dat]: https://huggingface.co/datasets/ikawrakow/validation-datasets-for-llama.cpp/tree/main
|
||||
[u-q4km]: https://huggingface.co/unsloth
|
||||
[ust-ai]: https://unsloth.ai
|
||||
[ust]: https://huggingface.co/unsloth
|
||||
[wki-dat]: https://huggingface.co/datasets/Salesforce/wikitext/tree/main/wikitext-2-raw-v1
|
||||
[wng-chl]: https://cdn.aaai.org/ocs/4492/4492-21843-1-PB.pdf
|
||||
[wng-tst]: https://huggingface.co/datasets/ikawrakow/winogrande-eval-for-llama.cpp/tree/main
|
||||
[wng]: https://winogrande.allenai.org
|
||||
3
granite-4.1-8b-F16.gguf
Normal file
3
granite-4.1-8b-F16.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:041a4adb5827786916837f193e624ac4fced3e7c0eb9458708f8e39a50c80640
|
||||
size 17587417856
|
||||
3
granite-4.1-8b-Q1_L.gguf
Normal file
3
granite-4.1-8b-Q1_L.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4dc3864de95c7f4d5568718625e58cf54d4046545d2bf397eb80970f0bfc7487
|
||||
size 1926722560
|
||||
3
granite-4.1-8b-Q2_K.gguf
Normal file
3
granite-4.1-8b-Q2_K.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c149301db1c2da459842becd06edee4d695bdaa98ab19b2e72a8e77dca283a6f
|
||||
size 2750919680
|
||||
3
granite-4.1-8b-Q3_K.gguf
Normal file
3
granite-4.1-8b-Q3_K.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:60b1874b8af548c0e7e265e025a1c0847afce50529defa47e090b2930cc0f2c1
|
||||
size 3849876480
|
||||
3
granite-4.1-8b-Q4_K.gguf
Normal file
3
granite-4.1-8b-Q4_K.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2253b2e0b33a5ae20ea3acfe4e2a0cc44e1ead7ca440997db710afc1f2e63005
|
||||
size 4948784128
|
||||
3
granite-4.1-8b-Q5_K.gguf
Normal file
3
granite-4.1-8b-Q5_K.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:00f2df50b6bdb0b2e6c510f71cb502192b5271e55880f8a2a44a9d6736c5813d
|
||||
size 6047708160
|
||||
3
granite-4.1-8b-Q6_K.gguf
Normal file
3
granite-4.1-8b-Q6_K.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:292a55c8ffd7ae6daf0db87a4399c70ed557158766a17f21ae528ab6bf10a222
|
||||
size 7146566656
|
||||
3
granite-4.1-8b-Q7_K.gguf
Normal file
3
granite-4.1-8b-Q7_K.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1a6e17af4482a0358522d65c8f0776605bdc565beaeefe998cb3975ef06a722d
|
||||
size 8245441536
|
||||
3
granite-4.1-8b-Q8_0.gguf
Normal file
3
granite-4.1-8b-Q8_0.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7dcd5c8a7650336bdce6628d5e7645d33f73c0a3da6a679251d9b48d624a66d2
|
||||
size 9343267840
|
||||
3
imatrix/imatrix-granite-4.1-8b-medium.gguf
Normal file
3
imatrix/imatrix-granite-4.1-8b-medium.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:884c70cf05568063f8e14655e063ead2ad52546f1421caab3a4ad0b4cc648601
|
||||
size 12054912
|
||||
3
logits/granite-4.1-8b.logits
Normal file
3
logits/granite-4.1-8b.logits
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:57a822d9464e7d5cf600bf914065cb083783e20355f566609d0e2e0059b13e4f
|
||||
size 28905288788
|
||||
1247
scores/granite-4.1-8b-Q1_L.md
Normal file
1247
scores/granite-4.1-8b-Q1_L.md
Normal file
File diff suppressed because it is too large
Load Diff
1247
scores/granite-4.1-8b-Q2_K.md
Normal file
1247
scores/granite-4.1-8b-Q2_K.md
Normal file
File diff suppressed because it is too large
Load Diff
1247
scores/granite-4.1-8b-Q3_K.md
Normal file
1247
scores/granite-4.1-8b-Q3_K.md
Normal file
File diff suppressed because it is too large
Load Diff
1247
scores/granite-4.1-8b-Q4_K.md
Normal file
1247
scores/granite-4.1-8b-Q4_K.md
Normal file
File diff suppressed because it is too large
Load Diff
1247
scores/granite-4.1-8b-Q5_K.md
Normal file
1247
scores/granite-4.1-8b-Q5_K.md
Normal file
File diff suppressed because it is too large
Load Diff
1247
scores/granite-4.1-8b-Q6_K.md
Normal file
1247
scores/granite-4.1-8b-Q6_K.md
Normal file
File diff suppressed because it is too large
Load Diff
1247
scores/granite-4.1-8b-Q7_K.md
Normal file
1247
scores/granite-4.1-8b-Q7_K.md
Normal file
File diff suppressed because it is too large
Load Diff
1247
scores/granite-4.1-8b-Q8_0.md
Normal file
1247
scores/granite-4.1-8b-Q8_0.md
Normal file
File diff suppressed because it is too large
Load Diff
18
scores/granite-4.1-8b-q1_l.arc
Normal file
18
scores/granite-4.1-8b-q1_l.arc
Normal file
@@ -0,0 +1,18 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q1_L.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 2 tensors
|
||||
llama_model_loader: - type iq2_xxs: 1 tensors
|
||||
llama_model_loader: - type iq2_xs: 22 tensors
|
||||
llama_model_loader: - type iq1_s: 209 tensors
|
||||
llama_model_loader: - type iq2_s: 17 tensors
|
||||
llama_model_loader: - type iq1_m: 31 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ1_M - 1.75 bpw
|
||||
print_info: file size = 1.79 GiB (1.75 BPW)
|
||||
|
||||
multiple_choice_score: there are 869 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 869 tasks available
|
||||
multiple_choice_score : calculating ARC Challenge score over 750 tasks.
|
||||
|
||||
Final result: 36.5333 +/- 1.7594
|
||||
Random chance: 25.0083 +/- 1.5824
|
||||
18
scores/granite-4.1-8b-q1_l.gpqa
Normal file
18
scores/granite-4.1-8b-q1_l.gpqa
Normal file
@@ -0,0 +1,18 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q1_L.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 2 tensors
|
||||
llama_model_loader: - type iq2_xxs: 1 tensors
|
||||
llama_model_loader: - type iq2_xs: 22 tensors
|
||||
llama_model_loader: - type iq1_s: 209 tensors
|
||||
llama_model_loader: - type iq2_s: 17 tensors
|
||||
llama_model_loader: - type iq1_m: 31 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ1_M - 1.75 bpw
|
||||
print_info: file size = 1.79 GiB (1.75 BPW)
|
||||
|
||||
multiple_choice_score: there are 198 tasks in prompt
|
||||
multiple_choice_score: reading tasks......................................................................................................................................................................................................done
|
||||
multiple_choice_score : calculating GPQA-Diamond score over 198 tasks.
|
||||
|
||||
Final result: 19.1919 +/- 2.8058
|
||||
Random chance: 24.5963 +/- 3.0683
|
||||
17
scores/granite-4.1-8b-q1_l.hsw
Normal file
17
scores/granite-4.1-8b-q1_l.hsw
Normal file
@@ -0,0 +1,17 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q1_L.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 2 tensors
|
||||
llama_model_loader: - type iq2_xxs: 1 tensors
|
||||
llama_model_loader: - type iq2_xs: 22 tensors
|
||||
llama_model_loader: - type iq1_s: 209 tensors
|
||||
llama_model_loader: - type iq2_s: 17 tensors
|
||||
llama_model_loader: - type iq1_m: 31 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ1_M - 1.75 bpw
|
||||
print_info: file size = 1.79 GiB (1.75 BPW)
|
||||
|
||||
hellaswag_score : loaded 10042 tasks from prompt.
|
||||
hellaswag_score : selecting 750 randomized tasks.
|
||||
hellaswag_score : calculating hellaswag score over selected tasks.
|
||||
|
||||
750 36.00000000% [32.6441%, 39.4986%]
|
||||
18
scores/granite-4.1-8b-q1_l.mmlu
Normal file
18
scores/granite-4.1-8b-q1_l.mmlu
Normal file
@@ -0,0 +1,18 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q1_L.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 2 tensors
|
||||
llama_model_loader: - type iq2_xxs: 1 tensors
|
||||
llama_model_loader: - type iq2_xs: 22 tensors
|
||||
llama_model_loader: - type iq1_s: 209 tensors
|
||||
llama_model_loader: - type iq2_s: 17 tensors
|
||||
llama_model_loader: - type iq1_m: 31 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ1_M - 1.75 bpw
|
||||
print_info: file size = 1.79 GiB (1.75 BPW)
|
||||
|
||||
multiple_choice_score: there are 5362 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 5362 tasks available
|
||||
multiple_choice_score : calculating MMLU-Redux score over 750 tasks.
|
||||
|
||||
Final result: 27.2000 +/- 1.6260
|
||||
Random chance: 25.0000 +/- 1.5822
|
||||
52
scores/granite-4.1-8b-q1_l.ppx
Normal file
52
scores/granite-4.1-8b-q1_l.ppx
Normal file
@@ -0,0 +1,52 @@
|
||||
Will perform strided perplexity calculation -> adjusting context size from 3072 to 3264
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q1_L.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 2 tensors
|
||||
llama_model_loader: - type iq2_xxs: 1 tensors
|
||||
llama_model_loader: - type iq2_xs: 22 tensors
|
||||
llama_model_loader: - type iq1_s: 209 tensors
|
||||
llama_model_loader: - type iq2_s: 17 tensors
|
||||
llama_model_loader: - type iq1_m: 31 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ1_M - 1.75 bpw
|
||||
print_info: file size = 1.79 GiB (1.75 BPW)
|
||||
|
||||
====== Perplexity statistics ======
|
||||
Mean PPL(Q) : 87.318832 ± 0.781580
|
||||
Mean PPL(base) : 8.691178 ± 0.065443
|
||||
Cor(ln(PPL(Q)), ln(PPL(base))): 57.61%
|
||||
Mean ln(PPL(Q)/PPL(base)) : 2.307258 ± 0.007692
|
||||
Mean PPL(Q)/PPL(base) : 10.046835 ± 0.077278
|
||||
Mean PPL(Q)-PPL(base) : 78.627654 ± 0.745801
|
||||
|
||||
====== KL divergence statistics ======
|
||||
Mean KLD: 2.889523 ± 0.005948
|
||||
Maximum KLD: 18.836405
|
||||
99.9% KLD: 13.080500
|
||||
99.0% KLD: 10.150911
|
||||
95.0% KLD: 7.400433
|
||||
90.0% KLD: 6.028920
|
||||
Median KLD: 2.342337
|
||||
10.0% KLD: 0.519454
|
||||
5.0% KLD: 0.230382
|
||||
1.0% KLD: 0.035005
|
||||
0.1% KLD: 0.005322
|
||||
Minimum KLD: 0.000491
|
||||
|
||||
====== Token probability statistics ======
|
||||
Mean Δp: -31.301 ± 0.094 %
|
||||
Maximum Δp: 92.012%
|
||||
99.9% Δp: 58.470%
|
||||
99.0% Δp: 27.386%
|
||||
95.0% Δp: 4.750%
|
||||
90.0% Δp: 0.243%
|
||||
75.0% Δp: -0.866%
|
||||
Median Δp: -17.956%
|
||||
25.0% Δp: -60.731%
|
||||
10.0% Δp: -90.577%
|
||||
5.0% Δp: -97.636%
|
||||
1.0% Δp: -99.874%
|
||||
0.1% Δp: -99.991%
|
||||
Minimum Δp: -100.000%
|
||||
RMS Δp : 47.467 ± 0.088 %
|
||||
Same top p: 34.309 ± 0.125 %
|
||||
18
scores/granite-4.1-8b-q1_l.tqa
Normal file
18
scores/granite-4.1-8b-q1_l.tqa
Normal file
@@ -0,0 +1,18 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q1_L.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 2 tensors
|
||||
llama_model_loader: - type iq2_xxs: 1 tensors
|
||||
llama_model_loader: - type iq2_xs: 22 tensors
|
||||
llama_model_loader: - type iq1_s: 209 tensors
|
||||
llama_model_loader: - type iq2_s: 17 tensors
|
||||
llama_model_loader: - type iq1_m: 31 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ1_M - 1.75 bpw
|
||||
print_info: file size = 1.79 GiB (1.75 BPW)
|
||||
|
||||
multiple_choice_score: there are 817 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 817 tasks available
|
||||
multiple_choice_score : calculating TruthfulQA score over 750 tasks.
|
||||
|
||||
Final result: 28.9333 +/- 1.6569
|
||||
Random chance: 19.8992 +/- 1.4588
|
||||
16
scores/granite-4.1-8b-q1_l.wng
Normal file
16
scores/granite-4.1-8b-q1_l.wng
Normal file
@@ -0,0 +1,16 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q1_L.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 2 tensors
|
||||
llama_model_loader: - type iq2_xxs: 1 tensors
|
||||
llama_model_loader: - type iq2_xs: 22 tensors
|
||||
llama_model_loader: - type iq1_s: 209 tensors
|
||||
llama_model_loader: - type iq2_s: 17 tensors
|
||||
llama_model_loader: - type iq1_m: 31 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ1_M - 1.75 bpw
|
||||
print_info: file size = 1.79 GiB (1.75 BPW)
|
||||
|
||||
winogrande_score : loaded 1266 tasks from prompt.
|
||||
winogrande_score : selecting 750 random tasks
|
||||
|
||||
Final Winogrande score(750 tasks): 52.5333 +/- 1.8246
|
||||
21
scores/granite-4.1-8b-q2_k.arc
Normal file
21
scores/granite-4.1-8b-q2_k.arc
Normal file
@@ -0,0 +1,21 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q2_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 35 tensors
|
||||
llama_model_loader: - type q3_K: 1 tensors
|
||||
llama_model_loader: - type iq2_xxs: 41 tensors
|
||||
llama_model_loader: - type iq2_xs: 56 tensors
|
||||
llama_model_loader: - type iq3_xxs: 6 tensors
|
||||
llama_model_loader: - type iq1_s: 2 tensors
|
||||
llama_model_loader: - type iq3_s: 38 tensors
|
||||
llama_model_loader: - type iq2_s: 99 tensors
|
||||
llama_model_loader: - type iq1_m: 4 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ2_S - 2.5 bpw
|
||||
print_info: file size = 2.56 GiB (2.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 869 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 869 tasks available
|
||||
multiple_choice_score : calculating ARC Challenge score over 750 tasks.
|
||||
|
||||
Final result: 60.4000 +/- 1.7870
|
||||
Random chance: 25.0083 +/- 1.5824
|
||||
21
scores/granite-4.1-8b-q2_k.gpqa
Normal file
21
scores/granite-4.1-8b-q2_k.gpqa
Normal file
@@ -0,0 +1,21 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q2_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 35 tensors
|
||||
llama_model_loader: - type q3_K: 1 tensors
|
||||
llama_model_loader: - type iq2_xxs: 41 tensors
|
||||
llama_model_loader: - type iq2_xs: 56 tensors
|
||||
llama_model_loader: - type iq3_xxs: 6 tensors
|
||||
llama_model_loader: - type iq1_s: 2 tensors
|
||||
llama_model_loader: - type iq3_s: 38 tensors
|
||||
llama_model_loader: - type iq2_s: 99 tensors
|
||||
llama_model_loader: - type iq1_m: 4 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ2_S - 2.5 bpw
|
||||
print_info: file size = 2.56 GiB (2.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 198 tasks in prompt
|
||||
multiple_choice_score: reading tasks......................................................................................................................................................................................................done
|
||||
multiple_choice_score : calculating GPQA-Diamond score over 198 tasks.
|
||||
|
||||
Final result: 29.7980 +/- 3.2586
|
||||
Random chance: 24.5963 +/- 3.0683
|
||||
20
scores/granite-4.1-8b-q2_k.hsw
Normal file
20
scores/granite-4.1-8b-q2_k.hsw
Normal file
@@ -0,0 +1,20 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q2_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 35 tensors
|
||||
llama_model_loader: - type q3_K: 1 tensors
|
||||
llama_model_loader: - type iq2_xxs: 41 tensors
|
||||
llama_model_loader: - type iq2_xs: 56 tensors
|
||||
llama_model_loader: - type iq3_xxs: 6 tensors
|
||||
llama_model_loader: - type iq1_s: 2 tensors
|
||||
llama_model_loader: - type iq3_s: 38 tensors
|
||||
llama_model_loader: - type iq2_s: 99 tensors
|
||||
llama_model_loader: - type iq1_m: 4 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ2_S - 2.5 bpw
|
||||
print_info: file size = 2.56 GiB (2.50 BPW)
|
||||
|
||||
hellaswag_score : loaded 10042 tasks from prompt.
|
||||
hellaswag_score : selecting 750 randomized tasks.
|
||||
hellaswag_score : calculating hellaswag score over selected tasks.
|
||||
|
||||
750 70.00000000% [66.6252%, 73.1710%]
|
||||
21
scores/granite-4.1-8b-q2_k.mmlu
Normal file
21
scores/granite-4.1-8b-q2_k.mmlu
Normal file
@@ -0,0 +1,21 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q2_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 35 tensors
|
||||
llama_model_loader: - type q3_K: 1 tensors
|
||||
llama_model_loader: - type iq2_xxs: 41 tensors
|
||||
llama_model_loader: - type iq2_xs: 56 tensors
|
||||
llama_model_loader: - type iq3_xxs: 6 tensors
|
||||
llama_model_loader: - type iq1_s: 2 tensors
|
||||
llama_model_loader: - type iq3_s: 38 tensors
|
||||
llama_model_loader: - type iq2_s: 99 tensors
|
||||
llama_model_loader: - type iq1_m: 4 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ2_S - 2.5 bpw
|
||||
print_info: file size = 2.56 GiB (2.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 5362 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 5362 tasks available
|
||||
multiple_choice_score : calculating MMLU-Redux score over 750 tasks.
|
||||
|
||||
Final result: 59.2000 +/- 1.7958
|
||||
Random chance: 25.0000 +/- 1.5822
|
||||
55
scores/granite-4.1-8b-q2_k.ppx
Normal file
55
scores/granite-4.1-8b-q2_k.ppx
Normal file
@@ -0,0 +1,55 @@
|
||||
Will perform strided perplexity calculation -> adjusting context size from 3072 to 3264
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q2_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 35 tensors
|
||||
llama_model_loader: - type q3_K: 1 tensors
|
||||
llama_model_loader: - type iq2_xxs: 41 tensors
|
||||
llama_model_loader: - type iq2_xs: 56 tensors
|
||||
llama_model_loader: - type iq3_xxs: 6 tensors
|
||||
llama_model_loader: - type iq1_s: 2 tensors
|
||||
llama_model_loader: - type iq3_s: 38 tensors
|
||||
llama_model_loader: - type iq2_s: 99 tensors
|
||||
llama_model_loader: - type iq1_m: 4 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ2_S - 2.5 bpw
|
||||
print_info: file size = 2.56 GiB (2.50 BPW)
|
||||
|
||||
====== Perplexity statistics ======
|
||||
Mean PPL(Q) : 12.534216 ± 0.095606
|
||||
Mean PPL(base) : 8.691178 ± 0.065443
|
||||
Cor(ln(PPL(Q)), ln(PPL(base))): 86.12%
|
||||
Mean ln(PPL(Q)/PPL(base)) : 0.366154 ± 0.003993
|
||||
Mean PPL(Q)/PPL(base) : 1.442177 ± 0.005759
|
||||
Mean PPL(Q)-PPL(base) : 3.843038 ± 0.051440
|
||||
|
||||
====== KL divergence statistics ======
|
||||
Mean KLD: 0.644965 ± 0.002755
|
||||
Maximum KLD: 18.493736
|
||||
99.9% KLD: 10.059598
|
||||
99.0% KLD: 5.407475
|
||||
95.0% KLD: 2.364230
|
||||
90.0% KLD: 1.477545
|
||||
Median KLD: 0.339213
|
||||
10.0% KLD: 0.010624
|
||||
5.0% KLD: 0.002467
|
||||
1.0% KLD: 0.000277
|
||||
0.1% KLD: 0.000030
|
||||
Minimum KLD: -0.000000
|
||||
|
||||
====== Token probability statistics ======
|
||||
Mean Δp: -7.810 ± 0.058 %
|
||||
Maximum Δp: 99.884%
|
||||
99.9% Δp: 73.625%
|
||||
99.0% Δp: 40.369%
|
||||
95.0% Δp: 16.588%
|
||||
90.0% Δp: 7.372%
|
||||
75.0% Δp: 0.201%
|
||||
Median Δp: -0.916%
|
||||
25.0% Δp: -11.370%
|
||||
10.0% Δp: -33.838%
|
||||
5.0% Δp: -54.602%
|
||||
1.0% Δp: -93.772%
|
||||
0.1% Δp: -99.844%
|
||||
Minimum Δp: -99.999%
|
||||
RMS Δp : 23.380 ± 0.085 %
|
||||
Same top p: 67.231 ± 0.124 %
|
||||
21
scores/granite-4.1-8b-q2_k.tqa
Normal file
21
scores/granite-4.1-8b-q2_k.tqa
Normal file
@@ -0,0 +1,21 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q2_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 35 tensors
|
||||
llama_model_loader: - type q3_K: 1 tensors
|
||||
llama_model_loader: - type iq2_xxs: 41 tensors
|
||||
llama_model_loader: - type iq2_xs: 56 tensors
|
||||
llama_model_loader: - type iq3_xxs: 6 tensors
|
||||
llama_model_loader: - type iq1_s: 2 tensors
|
||||
llama_model_loader: - type iq3_s: 38 tensors
|
||||
llama_model_loader: - type iq2_s: 99 tensors
|
||||
llama_model_loader: - type iq1_m: 4 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ2_S - 2.5 bpw
|
||||
print_info: file size = 2.56 GiB (2.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 817 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 817 tasks available
|
||||
multiple_choice_score : calculating TruthfulQA score over 750 tasks.
|
||||
|
||||
Final result: 33.4667 +/- 1.7242
|
||||
Random chance: 19.8992 +/- 1.4588
|
||||
19
scores/granite-4.1-8b-q2_k.wng
Normal file
19
scores/granite-4.1-8b-q2_k.wng
Normal file
@@ -0,0 +1,19 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q2_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 35 tensors
|
||||
llama_model_loader: - type q3_K: 1 tensors
|
||||
llama_model_loader: - type iq2_xxs: 41 tensors
|
||||
llama_model_loader: - type iq2_xs: 56 tensors
|
||||
llama_model_loader: - type iq3_xxs: 6 tensors
|
||||
llama_model_loader: - type iq1_s: 2 tensors
|
||||
llama_model_loader: - type iq3_s: 38 tensors
|
||||
llama_model_loader: - type iq2_s: 99 tensors
|
||||
llama_model_loader: - type iq1_m: 4 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ2_S - 2.5 bpw
|
||||
print_info: file size = 2.56 GiB (2.50 BPW)
|
||||
|
||||
winogrande_score : loaded 1266 tasks from prompt.
|
||||
winogrande_score : selecting 750 random tasks
|
||||
|
||||
Final Winogrande score(750 tasks): 65.2000 +/- 1.7405
|
||||
20
scores/granite-4.1-8b-q3_k.arc
Normal file
20
scores/granite-4.1-8b-q3_k.arc
Normal file
@@ -0,0 +1,20 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q3_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 1 tensors
|
||||
llama_model_loader: - type q3_K: 1 tensors
|
||||
llama_model_loader: - type q4_K: 33 tensors
|
||||
llama_model_loader: - type iq2_xxs: 1 tensors
|
||||
llama_model_loader: - type iq2_xs: 2 tensors
|
||||
llama_model_loader: - type iq3_xxs: 31 tensors
|
||||
llama_model_loader: - type iq3_s: 165 tensors
|
||||
llama_model_loader: - type iq4_xs: 48 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ4_XS - 4.25 bpw
|
||||
print_info: file size = 3.58 GiB (3.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 869 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 869 tasks available
|
||||
multiple_choice_score : calculating ARC Challenge score over 750 tasks.
|
||||
|
||||
Final result: 62.0000 +/- 1.7736
|
||||
Random chance: 25.0083 +/- 1.5824
|
||||
20
scores/granite-4.1-8b-q3_k.gpqa
Normal file
20
scores/granite-4.1-8b-q3_k.gpqa
Normal file
@@ -0,0 +1,20 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q3_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 1 tensors
|
||||
llama_model_loader: - type q3_K: 1 tensors
|
||||
llama_model_loader: - type q4_K: 33 tensors
|
||||
llama_model_loader: - type iq2_xxs: 1 tensors
|
||||
llama_model_loader: - type iq2_xs: 2 tensors
|
||||
llama_model_loader: - type iq3_xxs: 31 tensors
|
||||
llama_model_loader: - type iq3_s: 165 tensors
|
||||
llama_model_loader: - type iq4_xs: 48 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ4_XS - 4.25 bpw
|
||||
print_info: file size = 3.58 GiB (3.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 198 tasks in prompt
|
||||
multiple_choice_score: reading tasks......................................................................................................................................................................................................done
|
||||
multiple_choice_score : calculating GPQA-Diamond score over 198 tasks.
|
||||
|
||||
Final result: 21.7172 +/- 2.9377
|
||||
Random chance: 24.5963 +/- 3.0683
|
||||
19
scores/granite-4.1-8b-q3_k.hsw
Normal file
19
scores/granite-4.1-8b-q3_k.hsw
Normal file
@@ -0,0 +1,19 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q3_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 1 tensors
|
||||
llama_model_loader: - type q3_K: 1 tensors
|
||||
llama_model_loader: - type q4_K: 33 tensors
|
||||
llama_model_loader: - type iq2_xxs: 1 tensors
|
||||
llama_model_loader: - type iq2_xs: 2 tensors
|
||||
llama_model_loader: - type iq3_xxs: 31 tensors
|
||||
llama_model_loader: - type iq3_s: 165 tensors
|
||||
llama_model_loader: - type iq4_xs: 48 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ4_XS - 4.25 bpw
|
||||
print_info: file size = 3.58 GiB (3.50 BPW)
|
||||
|
||||
hellaswag_score : loaded 10042 tasks from prompt.
|
||||
hellaswag_score : selecting 750 randomized tasks.
|
||||
hellaswag_score : calculating hellaswag score over selected tasks.
|
||||
|
||||
750 79.33333333% [76.2895%, 82.0782%]
|
||||
20
scores/granite-4.1-8b-q3_k.mmlu
Normal file
20
scores/granite-4.1-8b-q3_k.mmlu
Normal file
@@ -0,0 +1,20 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q3_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 1 tensors
|
||||
llama_model_loader: - type q3_K: 1 tensors
|
||||
llama_model_loader: - type q4_K: 33 tensors
|
||||
llama_model_loader: - type iq2_xxs: 1 tensors
|
||||
llama_model_loader: - type iq2_xs: 2 tensors
|
||||
llama_model_loader: - type iq3_xxs: 31 tensors
|
||||
llama_model_loader: - type iq3_s: 165 tensors
|
||||
llama_model_loader: - type iq4_xs: 48 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ4_XS - 4.25 bpw
|
||||
print_info: file size = 3.58 GiB (3.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 5362 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 5362 tasks available
|
||||
multiple_choice_score : calculating MMLU-Redux score over 750 tasks.
|
||||
|
||||
Final result: 69.2000 +/- 1.6869
|
||||
Random chance: 25.0000 +/- 1.5822
|
||||
54
scores/granite-4.1-8b-q3_k.ppx
Normal file
54
scores/granite-4.1-8b-q3_k.ppx
Normal file
@@ -0,0 +1,54 @@
|
||||
Will perform strided perplexity calculation -> adjusting context size from 3072 to 3264
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q3_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 1 tensors
|
||||
llama_model_loader: - type q3_K: 1 tensors
|
||||
llama_model_loader: - type q4_K: 33 tensors
|
||||
llama_model_loader: - type iq2_xxs: 1 tensors
|
||||
llama_model_loader: - type iq2_xs: 2 tensors
|
||||
llama_model_loader: - type iq3_xxs: 31 tensors
|
||||
llama_model_loader: - type iq3_s: 165 tensors
|
||||
llama_model_loader: - type iq4_xs: 48 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ4_XS - 4.25 bpw
|
||||
print_info: file size = 3.58 GiB (3.50 BPW)
|
||||
|
||||
====== Perplexity statistics ======
|
||||
Mean PPL(Q) : 9.381594 ± 0.070128
|
||||
Mean PPL(base) : 8.691178 ± 0.065443
|
||||
Cor(ln(PPL(Q)), ln(PPL(base))): 96.18%
|
||||
Mean ln(PPL(Q)/PPL(base)) : 0.076441 ± 0.002075
|
||||
Mean PPL(Q)/PPL(base) : 1.079439 ± 0.002240
|
||||
Mean PPL(Q)-PPL(base) : 0.690416 ± 0.019310
|
||||
|
||||
====== KL divergence statistics ======
|
||||
Mean KLD: 0.173887 ± 0.001079
|
||||
Maximum KLD: 14.743266
|
||||
99.9% KLD: 5.111659
|
||||
99.0% KLD: 1.928733
|
||||
95.0% KLD: 0.623000
|
||||
90.0% KLD: 0.364871
|
||||
Median KLD: 0.072451
|
||||
10.0% KLD: 0.001391
|
||||
5.0% KLD: 0.000306
|
||||
1.0% KLD: 0.000027
|
||||
0.1% KLD: 0.000001
|
||||
Minimum KLD: -0.000004
|
||||
|
||||
====== Token probability statistics ======
|
||||
Mean Δp: -2.503 ± 0.031 %
|
||||
Maximum Δp: 96.204%
|
||||
99.9% Δp: 57.084%
|
||||
99.0% Δp: 26.183%
|
||||
95.0% Δp: 10.344%
|
||||
90.0% Δp: 5.013%
|
||||
75.0% Δp: 0.347%
|
||||
Median Δp: -0.138%
|
||||
25.0% Δp: -3.676%
|
||||
10.0% Δp: -12.687%
|
||||
5.0% Δp: -21.160%
|
||||
1.0% Δp: -51.918%
|
||||
0.1% Δp: -92.971%
|
||||
Minimum Δp: -99.997%
|
||||
RMS Δp : 12.185 ± 0.069 %
|
||||
Same top p: 82.732 ± 0.100 %
|
||||
20
scores/granite-4.1-8b-q3_k.tqa
Normal file
20
scores/granite-4.1-8b-q3_k.tqa
Normal file
@@ -0,0 +1,20 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q3_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 1 tensors
|
||||
llama_model_loader: - type q3_K: 1 tensors
|
||||
llama_model_loader: - type q4_K: 33 tensors
|
||||
llama_model_loader: - type iq2_xxs: 1 tensors
|
||||
llama_model_loader: - type iq2_xs: 2 tensors
|
||||
llama_model_loader: - type iq3_xxs: 31 tensors
|
||||
llama_model_loader: - type iq3_s: 165 tensors
|
||||
llama_model_loader: - type iq4_xs: 48 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ4_XS - 4.25 bpw
|
||||
print_info: file size = 3.58 GiB (3.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 817 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 817 tasks available
|
||||
multiple_choice_score : calculating TruthfulQA score over 750 tasks.
|
||||
|
||||
Final result: 39.6000 +/- 1.7870
|
||||
Random chance: 19.8992 +/- 1.4588
|
||||
18
scores/granite-4.1-8b-q3_k.wng
Normal file
18
scores/granite-4.1-8b-q3_k.wng
Normal file
@@ -0,0 +1,18 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q3_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q2_K: 1 tensors
|
||||
llama_model_loader: - type q3_K: 1 tensors
|
||||
llama_model_loader: - type q4_K: 33 tensors
|
||||
llama_model_loader: - type iq2_xxs: 1 tensors
|
||||
llama_model_loader: - type iq2_xs: 2 tensors
|
||||
llama_model_loader: - type iq3_xxs: 31 tensors
|
||||
llama_model_loader: - type iq3_s: 165 tensors
|
||||
llama_model_loader: - type iq4_xs: 48 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = IQ4_XS - 4.25 bpw
|
||||
print_info: file size = 3.58 GiB (3.50 BPW)
|
||||
|
||||
winogrande_score : loaded 1266 tasks from prompt.
|
||||
winogrande_score : selecting 750 random tasks
|
||||
|
||||
Final Winogrande score(750 tasks): 71.7333 +/- 1.6453
|
||||
17
scores/granite-4.1-8b-q4_k.arc
Normal file
17
scores/granite-4.1-8b-q4_k.arc
Normal file
@@ -0,0 +1,17 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q4_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q4_K: 90 tensors
|
||||
llama_model_loader: - type q5_K: 79 tensors
|
||||
llama_model_loader: - type iq3_xxs: 2 tensors
|
||||
llama_model_loader: - type iq3_s: 1 tensors
|
||||
llama_model_loader: - type iq4_xs: 110 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q4_K - Medium
|
||||
print_info: file size = 4.61 GiB (4.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 869 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 869 tasks available
|
||||
multiple_choice_score : calculating ARC Challenge score over 750 tasks.
|
||||
|
||||
Final result: 66.9333 +/- 1.7190
|
||||
Random chance: 25.0083 +/- 1.5824
|
||||
17
scores/granite-4.1-8b-q4_k.gpqa
Normal file
17
scores/granite-4.1-8b-q4_k.gpqa
Normal file
@@ -0,0 +1,17 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q4_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q4_K: 90 tensors
|
||||
llama_model_loader: - type q5_K: 79 tensors
|
||||
llama_model_loader: - type iq3_xxs: 2 tensors
|
||||
llama_model_loader: - type iq3_s: 1 tensors
|
||||
llama_model_loader: - type iq4_xs: 110 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q4_K - Medium
|
||||
print_info: file size = 4.61 GiB (4.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 198 tasks in prompt
|
||||
multiple_choice_score: reading tasks......................................................................................................................................................................................................done
|
||||
multiple_choice_score : calculating GPQA-Diamond score over 198 tasks.
|
||||
|
||||
Final result: 23.2323 +/- 3.0089
|
||||
Random chance: 24.5963 +/- 3.0683
|
||||
16
scores/granite-4.1-8b-q4_k.hsw
Normal file
16
scores/granite-4.1-8b-q4_k.hsw
Normal file
@@ -0,0 +1,16 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q4_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q4_K: 90 tensors
|
||||
llama_model_loader: - type q5_K: 79 tensors
|
||||
llama_model_loader: - type iq3_xxs: 2 tensors
|
||||
llama_model_loader: - type iq3_s: 1 tensors
|
||||
llama_model_loader: - type iq4_xs: 110 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q4_K - Medium
|
||||
print_info: file size = 4.61 GiB (4.50 BPW)
|
||||
|
||||
hellaswag_score : loaded 10042 tasks from prompt.
|
||||
hellaswag_score : selecting 750 randomized tasks.
|
||||
hellaswag_score : calculating hellaswag score over selected tasks.
|
||||
|
||||
750 79.73333333% [76.7082%, 82.4554%]
|
||||
17
scores/granite-4.1-8b-q4_k.mmlu
Normal file
17
scores/granite-4.1-8b-q4_k.mmlu
Normal file
@@ -0,0 +1,17 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q4_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q4_K: 90 tensors
|
||||
llama_model_loader: - type q5_K: 79 tensors
|
||||
llama_model_loader: - type iq3_xxs: 2 tensors
|
||||
llama_model_loader: - type iq3_s: 1 tensors
|
||||
llama_model_loader: - type iq4_xs: 110 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q4_K - Medium
|
||||
print_info: file size = 4.61 GiB (4.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 5362 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 5362 tasks available
|
||||
multiple_choice_score : calculating MMLU-Redux score over 750 tasks.
|
||||
|
||||
Final result: 71.4667 +/- 1.6500
|
||||
Random chance: 25.0000 +/- 1.5822
|
||||
51
scores/granite-4.1-8b-q4_k.ppx
Normal file
51
scores/granite-4.1-8b-q4_k.ppx
Normal file
@@ -0,0 +1,51 @@
|
||||
Will perform strided perplexity calculation -> adjusting context size from 3072 to 3264
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q4_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q4_K: 90 tensors
|
||||
llama_model_loader: - type q5_K: 79 tensors
|
||||
llama_model_loader: - type iq3_xxs: 2 tensors
|
||||
llama_model_loader: - type iq3_s: 1 tensors
|
||||
llama_model_loader: - type iq4_xs: 110 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q4_K - Medium
|
||||
print_info: file size = 4.61 GiB (4.50 BPW)
|
||||
|
||||
====== Perplexity statistics ======
|
||||
Mean PPL(Q) : 8.867438 ± 0.067303
|
||||
Mean PPL(base) : 8.691178 ± 0.065443
|
||||
Cor(ln(PPL(Q)), ln(PPL(base))): 98.88%
|
||||
Mean ln(PPL(Q)/PPL(base)) : 0.020077 ± 0.001134
|
||||
Mean PPL(Q)/PPL(base) : 1.020280 ± 0.001157
|
||||
Mean PPL(Q)-PPL(base) : 0.176260 ± 0.010114
|
||||
|
||||
====== KL divergence statistics ======
|
||||
Mean KLD: 0.047917 ± 0.000392
|
||||
Maximum KLD: 8.513770
|
||||
99.9% KLD: 1.939411
|
||||
99.0% KLD: 0.563371
|
||||
95.0% KLD: 0.167563
|
||||
90.0% KLD: 0.095562
|
||||
Median KLD: 0.017061
|
||||
10.0% KLD: 0.000210
|
||||
5.0% KLD: 0.000043
|
||||
1.0% KLD: 0.000003
|
||||
0.1% KLD: -0.000001
|
||||
Minimum KLD: -0.000004
|
||||
|
||||
====== Token probability statistics ======
|
||||
Mean Δp: -0.365 ± 0.017 %
|
||||
Maximum Δp: 95.922%
|
||||
99.9% Δp: 43.653%
|
||||
99.0% Δp: 16.925%
|
||||
95.0% Δp: 6.946%
|
||||
90.0% Δp: 3.742%
|
||||
75.0% Δp: 0.557%
|
||||
Median Δp: -0.002%
|
||||
25.0% Δp: -0.901%
|
||||
10.0% Δp: -4.570%
|
||||
5.0% Δp: -8.319%
|
||||
1.0% Δp: -22.260%
|
||||
0.1% Δp: -58.978%
|
||||
Minimum Δp: -98.363%
|
||||
RMS Δp : 6.446 ± 0.053 %
|
||||
Same top p: 90.937 ± 0.076 %
|
||||
17
scores/granite-4.1-8b-q4_k.tqa
Normal file
17
scores/granite-4.1-8b-q4_k.tqa
Normal file
@@ -0,0 +1,17 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q4_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q4_K: 90 tensors
|
||||
llama_model_loader: - type q5_K: 79 tensors
|
||||
llama_model_loader: - type iq3_xxs: 2 tensors
|
||||
llama_model_loader: - type iq3_s: 1 tensors
|
||||
llama_model_loader: - type iq4_xs: 110 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q4_K - Medium
|
||||
print_info: file size = 4.61 GiB (4.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 817 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 817 tasks available
|
||||
multiple_choice_score : calculating TruthfulQA score over 750 tasks.
|
||||
|
||||
Final result: 38.9333 +/- 1.7816
|
||||
Random chance: 19.8992 +/- 1.4588
|
||||
15
scores/granite-4.1-8b-q4_k.wng
Normal file
15
scores/granite-4.1-8b-q4_k.wng
Normal file
@@ -0,0 +1,15 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q4_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q4_K: 90 tensors
|
||||
llama_model_loader: - type q5_K: 79 tensors
|
||||
llama_model_loader: - type iq3_xxs: 2 tensors
|
||||
llama_model_loader: - type iq3_s: 1 tensors
|
||||
llama_model_loader: - type iq4_xs: 110 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q4_K - Medium
|
||||
print_info: file size = 4.61 GiB (4.50 BPW)
|
||||
|
||||
winogrande_score : loaded 1266 tasks from prompt.
|
||||
winogrande_score : selecting 750 random tasks
|
||||
|
||||
Final Winogrande score(750 tasks): 73.4667 +/- 1.6132
|
||||
17
scores/granite-4.1-8b-q5_k.arc
Normal file
17
scores/granite-4.1-8b-q5_k.arc
Normal file
@@ -0,0 +1,17 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q5_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 1 tensors
|
||||
llama_model_loader: - type q4_K: 26 tensors
|
||||
llama_model_loader: - type q5_K: 246 tensors
|
||||
llama_model_loader: - type q6_K: 4 tensors
|
||||
llama_model_loader: - type iq4_xs: 5 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q5_K - Medium
|
||||
print_info: file size = 5.63 GiB (5.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 869 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 869 tasks available
|
||||
multiple_choice_score : calculating ARC Challenge score over 750 tasks.
|
||||
|
||||
Final result: 66.4000 +/- 1.7259
|
||||
Random chance: 25.0083 +/- 1.5824
|
||||
17
scores/granite-4.1-8b-q5_k.gpqa
Normal file
17
scores/granite-4.1-8b-q5_k.gpqa
Normal file
@@ -0,0 +1,17 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q5_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 1 tensors
|
||||
llama_model_loader: - type q4_K: 26 tensors
|
||||
llama_model_loader: - type q5_K: 246 tensors
|
||||
llama_model_loader: - type q6_K: 4 tensors
|
||||
llama_model_loader: - type iq4_xs: 5 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q5_K - Medium
|
||||
print_info: file size = 5.63 GiB (5.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 198 tasks in prompt
|
||||
multiple_choice_score: reading tasks......................................................................................................................................................................................................done
|
||||
multiple_choice_score : calculating GPQA-Diamond score over 198 tasks.
|
||||
|
||||
Final result: 22.7273 +/- 2.9858
|
||||
Random chance: 24.5963 +/- 3.0683
|
||||
16
scores/granite-4.1-8b-q5_k.hsw
Normal file
16
scores/granite-4.1-8b-q5_k.hsw
Normal file
@@ -0,0 +1,16 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q5_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 1 tensors
|
||||
llama_model_loader: - type q4_K: 26 tensors
|
||||
llama_model_loader: - type q5_K: 246 tensors
|
||||
llama_model_loader: - type q6_K: 4 tensors
|
||||
llama_model_loader: - type iq4_xs: 5 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q5_K - Medium
|
||||
print_info: file size = 5.63 GiB (5.50 BPW)
|
||||
|
||||
hellaswag_score : loaded 10042 tasks from prompt.
|
||||
hellaswag_score : selecting 750 randomized tasks.
|
||||
hellaswag_score : calculating hellaswag score over selected tasks.
|
||||
|
||||
750 79.86666667% [76.8479%, 82.5810%]
|
||||
17
scores/granite-4.1-8b-q5_k.mmlu
Normal file
17
scores/granite-4.1-8b-q5_k.mmlu
Normal file
@@ -0,0 +1,17 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q5_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 1 tensors
|
||||
llama_model_loader: - type q4_K: 26 tensors
|
||||
llama_model_loader: - type q5_K: 246 tensors
|
||||
llama_model_loader: - type q6_K: 4 tensors
|
||||
llama_model_loader: - type iq4_xs: 5 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q5_K - Medium
|
||||
print_info: file size = 5.63 GiB (5.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 5362 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 5362 tasks available
|
||||
multiple_choice_score : calculating MMLU-Redux score over 750 tasks.
|
||||
|
||||
Final result: 72.1333 +/- 1.6382
|
||||
Random chance: 25.0000 +/- 1.5822
|
||||
51
scores/granite-4.1-8b-q5_k.ppx
Normal file
51
scores/granite-4.1-8b-q5_k.ppx
Normal file
@@ -0,0 +1,51 @@
|
||||
Will perform strided perplexity calculation -> adjusting context size from 3072 to 3264
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q5_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 1 tensors
|
||||
llama_model_loader: - type q4_K: 26 tensors
|
||||
llama_model_loader: - type q5_K: 246 tensors
|
||||
llama_model_loader: - type q6_K: 4 tensors
|
||||
llama_model_loader: - type iq4_xs: 5 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q5_K - Medium
|
||||
print_info: file size = 5.63 GiB (5.50 BPW)
|
||||
|
||||
====== Perplexity statistics ======
|
||||
Mean PPL(Q) : 8.766150 ± 0.066421
|
||||
Mean PPL(base) : 8.691178 ± 0.065443
|
||||
Cor(ln(PPL(Q)), ln(PPL(base))): 99.48%
|
||||
Mean ln(PPL(Q)/PPL(base)) : 0.008589 ± 0.000770
|
||||
Mean PPL(Q)/PPL(base) : 1.008626 ± 0.000776
|
||||
Mean PPL(Q)-PPL(base) : 0.074972 ± 0.006775
|
||||
|
||||
====== KL divergence statistics ======
|
||||
Mean KLD: 0.018940 ± 0.000165
|
||||
Maximum KLD: 5.148437
|
||||
99.9% KLD: 0.762867
|
||||
99.0% KLD: 0.213410
|
||||
95.0% KLD: 0.065414
|
||||
90.0% KLD: 0.037975
|
||||
Median KLD: 0.006922
|
||||
10.0% KLD: 0.000082
|
||||
5.0% KLD: 0.000017
|
||||
1.0% KLD: 0.000001
|
||||
0.1% KLD: -0.000003
|
||||
Minimum KLD: -0.000006
|
||||
|
||||
====== Token probability statistics ======
|
||||
Mean Δp: -0.182 ± 0.011 %
|
||||
Maximum Δp: 91.879%
|
||||
99.9% Δp: 30.364%
|
||||
99.0% Δp: 10.955%
|
||||
95.0% Δp: 4.408%
|
||||
90.0% Δp: 2.363%
|
||||
75.0% Δp: 0.367%
|
||||
Median Δp: -0.001%
|
||||
25.0% Δp: -0.575%
|
||||
10.0% Δp: -2.918%
|
||||
5.0% Δp: -5.194%
|
||||
1.0% Δp: -12.983%
|
||||
0.1% Δp: -33.902%
|
||||
Minimum Δp: -90.554%
|
||||
RMS Δp : 4.061 ± 0.040 %
|
||||
Same top p: 94.120 ± 0.062 %
|
||||
17
scores/granite-4.1-8b-q5_k.tqa
Normal file
17
scores/granite-4.1-8b-q5_k.tqa
Normal file
@@ -0,0 +1,17 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q5_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 1 tensors
|
||||
llama_model_loader: - type q4_K: 26 tensors
|
||||
llama_model_loader: - type q5_K: 246 tensors
|
||||
llama_model_loader: - type q6_K: 4 tensors
|
||||
llama_model_loader: - type iq4_xs: 5 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q5_K - Medium
|
||||
print_info: file size = 5.63 GiB (5.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 817 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 817 tasks available
|
||||
multiple_choice_score : calculating TruthfulQA score over 750 tasks.
|
||||
|
||||
Final result: 38.5333 +/- 1.7783
|
||||
Random chance: 19.8992 +/- 1.4588
|
||||
15
scores/granite-4.1-8b-q5_k.wng
Normal file
15
scores/granite-4.1-8b-q5_k.wng
Normal file
@@ -0,0 +1,15 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q5_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 1 tensors
|
||||
llama_model_loader: - type q4_K: 26 tensors
|
||||
llama_model_loader: - type q5_K: 246 tensors
|
||||
llama_model_loader: - type q6_K: 4 tensors
|
||||
llama_model_loader: - type iq4_xs: 5 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q5_K - Medium
|
||||
print_info: file size = 5.63 GiB (5.50 BPW)
|
||||
|
||||
winogrande_score : loaded 1266 tasks from prompt.
|
||||
winogrande_score : selecting 750 random tasks
|
||||
|
||||
Final Winogrande score(750 tasks): 73.4667 +/- 1.6132
|
||||
16
scores/granite-4.1-8b-q6_k.arc
Normal file
16
scores/granite-4.1-8b-q6_k.arc
Normal file
@@ -0,0 +1,16 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q6_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q8_0: 35 tensors
|
||||
llama_model_loader: - type q4_K: 2 tensors
|
||||
llama_model_loader: - type q5_K: 75 tensors
|
||||
llama_model_loader: - type q6_K: 170 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q6_K
|
||||
print_info: file size = 6.65 GiB (6.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 869 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 869 tasks available
|
||||
multiple_choice_score : calculating ARC Challenge score over 750 tasks.
|
||||
|
||||
Final result: 67.0667 +/- 1.7172
|
||||
Random chance: 25.0083 +/- 1.5824
|
||||
16
scores/granite-4.1-8b-q6_k.gpqa
Normal file
16
scores/granite-4.1-8b-q6_k.gpqa
Normal file
@@ -0,0 +1,16 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q6_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q8_0: 35 tensors
|
||||
llama_model_loader: - type q4_K: 2 tensors
|
||||
llama_model_loader: - type q5_K: 75 tensors
|
||||
llama_model_loader: - type q6_K: 170 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q6_K
|
||||
print_info: file size = 6.65 GiB (6.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 198 tasks in prompt
|
||||
multiple_choice_score: reading tasks......................................................................................................................................................................................................done
|
||||
multiple_choice_score : calculating GPQA-Diamond score over 198 tasks.
|
||||
|
||||
Final result: 24.7475 +/- 3.0746
|
||||
Random chance: 24.5963 +/- 3.0683
|
||||
15
scores/granite-4.1-8b-q6_k.hsw
Normal file
15
scores/granite-4.1-8b-q6_k.hsw
Normal file
@@ -0,0 +1,15 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q6_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q8_0: 35 tensors
|
||||
llama_model_loader: - type q4_K: 2 tensors
|
||||
llama_model_loader: - type q5_K: 75 tensors
|
||||
llama_model_loader: - type q6_K: 170 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q6_K
|
||||
print_info: file size = 6.65 GiB (6.50 BPW)
|
||||
|
||||
hellaswag_score : loaded 10042 tasks from prompt.
|
||||
hellaswag_score : selecting 750 randomized tasks.
|
||||
hellaswag_score : calculating hellaswag score over selected tasks.
|
||||
|
||||
750 80.13333333% [77.1274%, 82.8322%]
|
||||
16
scores/granite-4.1-8b-q6_k.mmlu
Normal file
16
scores/granite-4.1-8b-q6_k.mmlu
Normal file
@@ -0,0 +1,16 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q6_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q8_0: 35 tensors
|
||||
llama_model_loader: - type q4_K: 2 tensors
|
||||
llama_model_loader: - type q5_K: 75 tensors
|
||||
llama_model_loader: - type q6_K: 170 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q6_K
|
||||
print_info: file size = 6.65 GiB (6.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 5362 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 5362 tasks available
|
||||
multiple_choice_score : calculating MMLU-Redux score over 750 tasks.
|
||||
|
||||
Final result: 72.6667 +/- 1.6284
|
||||
Random chance: 25.0000 +/- 1.5822
|
||||
50
scores/granite-4.1-8b-q6_k.ppx
Normal file
50
scores/granite-4.1-8b-q6_k.ppx
Normal file
@@ -0,0 +1,50 @@
|
||||
Will perform strided perplexity calculation -> adjusting context size from 3072 to 3264
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q6_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q8_0: 35 tensors
|
||||
llama_model_loader: - type q4_K: 2 tensors
|
||||
llama_model_loader: - type q5_K: 75 tensors
|
||||
llama_model_loader: - type q6_K: 170 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q6_K
|
||||
print_info: file size = 6.65 GiB (6.50 BPW)
|
||||
|
||||
====== Perplexity statistics ======
|
||||
Mean PPL(Q) : 8.755199 ± 0.066400
|
||||
Mean PPL(base) : 8.691178 ± 0.065443
|
||||
Cor(ln(PPL(Q)), ln(PPL(base))): 99.74%
|
||||
Mean ln(PPL(Q)/PPL(base)) : 0.007339 ± 0.000550
|
||||
Mean PPL(Q)/PPL(base) : 1.007366 ± 0.000554
|
||||
Mean PPL(Q)-PPL(base) : 0.064021 ± 0.004871
|
||||
|
||||
====== KL divergence statistics ======
|
||||
Mean KLD: 0.007326 ± 0.000066
|
||||
Maximum KLD: 2.273445
|
||||
99.9% KLD: 0.273010
|
||||
99.0% KLD: 0.081142
|
||||
95.0% KLD: 0.025136
|
||||
90.0% KLD: 0.014795
|
||||
Median KLD: 0.002831
|
||||
10.0% KLD: 0.000033
|
||||
5.0% KLD: 0.000007
|
||||
1.0% KLD: 0.000000
|
||||
0.1% KLD: -0.000003
|
||||
Minimum KLD: -0.000027
|
||||
|
||||
====== Token probability statistics ======
|
||||
Mean Δp: -0.107 ± 0.007 %
|
||||
Maximum Δp: 79.111%
|
||||
99.9% Δp: 18.154%
|
||||
99.0% Δp: 7.012%
|
||||
95.0% Δp: 2.856%
|
||||
90.0% Δp: 1.538%
|
||||
75.0% Δp: 0.232%
|
||||
Median Δp: -0.001%
|
||||
25.0% Δp: -0.371%
|
||||
10.0% Δp: -1.892%
|
||||
5.0% Δp: -3.299%
|
||||
1.0% Δp: -7.834%
|
||||
0.1% Δp: -21.254%
|
||||
Minimum Δp: -87.374%
|
||||
RMS Δp : 2.562 ± 0.030 %
|
||||
Same top p: 96.165 ± 0.051 %
|
||||
16
scores/granite-4.1-8b-q6_k.tqa
Normal file
16
scores/granite-4.1-8b-q6_k.tqa
Normal file
@@ -0,0 +1,16 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q6_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q8_0: 35 tensors
|
||||
llama_model_loader: - type q4_K: 2 tensors
|
||||
llama_model_loader: - type q5_K: 75 tensors
|
||||
llama_model_loader: - type q6_K: 170 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q6_K
|
||||
print_info: file size = 6.65 GiB (6.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 817 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 817 tasks available
|
||||
multiple_choice_score : calculating TruthfulQA score over 750 tasks.
|
||||
|
||||
Final result: 38.2667 +/- 1.7759
|
||||
Random chance: 19.8992 +/- 1.4588
|
||||
14
scores/granite-4.1-8b-q6_k.wng
Normal file
14
scores/granite-4.1-8b-q6_k.wng
Normal file
@@ -0,0 +1,14 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q6_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q8_0: 35 tensors
|
||||
llama_model_loader: - type q4_K: 2 tensors
|
||||
llama_model_loader: - type q5_K: 75 tensors
|
||||
llama_model_loader: - type q6_K: 170 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q6_K
|
||||
print_info: file size = 6.65 GiB (6.50 BPW)
|
||||
|
||||
winogrande_score : loaded 1266 tasks from prompt.
|
||||
winogrande_score : selecting 750 random tasks
|
||||
|
||||
Final Winogrande score(750 tasks): 73.7333 +/- 1.6080
|
||||
15
scores/granite-4.1-8b-q7_k.arc
Normal file
15
scores/granite-4.1-8b-q7_k.arc
Normal file
@@ -0,0 +1,15 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q7_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 4 tensors
|
||||
llama_model_loader: - type q8_0: 146 tensors
|
||||
llama_model_loader: - type q6_K: 132 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q8_0
|
||||
print_info: file size = 7.68 GiB (7.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 869 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 869 tasks available
|
||||
multiple_choice_score : calculating ARC Challenge score over 750 tasks.
|
||||
|
||||
Final result: 66.4000 +/- 1.7259
|
||||
Random chance: 25.0083 +/- 1.5824
|
||||
15
scores/granite-4.1-8b-q7_k.gpqa
Normal file
15
scores/granite-4.1-8b-q7_k.gpqa
Normal file
@@ -0,0 +1,15 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q7_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 4 tensors
|
||||
llama_model_loader: - type q8_0: 146 tensors
|
||||
llama_model_loader: - type q6_K: 132 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q8_0
|
||||
print_info: file size = 7.68 GiB (7.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 198 tasks in prompt
|
||||
multiple_choice_score: reading tasks......................................................................................................................................................................................................done
|
||||
multiple_choice_score : calculating GPQA-Diamond score over 198 tasks.
|
||||
|
||||
Final result: 26.7677 +/- 3.1544
|
||||
Random chance: 24.5963 +/- 3.0683
|
||||
14
scores/granite-4.1-8b-q7_k.hsw
Normal file
14
scores/granite-4.1-8b-q7_k.hsw
Normal file
@@ -0,0 +1,14 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q7_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 4 tensors
|
||||
llama_model_loader: - type q8_0: 146 tensors
|
||||
llama_model_loader: - type q6_K: 132 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q8_0
|
||||
print_info: file size = 7.68 GiB (7.50 BPW)
|
||||
|
||||
hellaswag_score : loaded 10042 tasks from prompt.
|
||||
hellaswag_score : selecting 750 randomized tasks.
|
||||
hellaswag_score : calculating hellaswag score over selected tasks.
|
||||
|
||||
750 80.26666667% [77.2672%, 82.9576%]
|
||||
15
scores/granite-4.1-8b-q7_k.mmlu
Normal file
15
scores/granite-4.1-8b-q7_k.mmlu
Normal file
@@ -0,0 +1,15 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q7_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 4 tensors
|
||||
llama_model_loader: - type q8_0: 146 tensors
|
||||
llama_model_loader: - type q6_K: 132 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q8_0
|
||||
print_info: file size = 7.68 GiB (7.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 5362 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 5362 tasks available
|
||||
multiple_choice_score : calculating MMLU-Redux score over 750 tasks.
|
||||
|
||||
Final result: 72.1333 +/- 1.6382
|
||||
Random chance: 25.0000 +/- 1.5822
|
||||
49
scores/granite-4.1-8b-q7_k.ppx
Normal file
49
scores/granite-4.1-8b-q7_k.ppx
Normal file
@@ -0,0 +1,49 @@
|
||||
Will perform strided perplexity calculation -> adjusting context size from 3072 to 3264
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q7_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 4 tensors
|
||||
llama_model_loader: - type q8_0: 146 tensors
|
||||
llama_model_loader: - type q6_K: 132 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q8_0
|
||||
print_info: file size = 7.68 GiB (7.50 BPW)
|
||||
|
||||
====== Perplexity statistics ======
|
||||
Mean PPL(Q) : 8.751241 ± 0.066500
|
||||
Mean PPL(base) : 8.691178 ± 0.065443
|
||||
Cor(ln(PPL(Q)), ln(PPL(base))): 99.82%
|
||||
Mean ln(PPL(Q)/PPL(base)) : 0.006887 ± 0.000464
|
||||
Mean PPL(Q)/PPL(base) : 1.006911 ± 0.000467
|
||||
Mean PPL(Q)-PPL(base) : 0.060063 ± 0.004141
|
||||
|
||||
====== KL divergence statistics ======
|
||||
Mean KLD: 0.003568 ± 0.000040
|
||||
Maximum KLD: 2.888946
|
||||
99.9% KLD: 0.140997
|
||||
99.0% KLD: 0.037132
|
||||
95.0% KLD: 0.011608
|
||||
90.0% KLD: 0.006961
|
||||
Median KLD: 0.001456
|
||||
10.0% KLD: 0.000015
|
||||
5.0% KLD: 0.000003
|
||||
1.0% KLD: -0.000000
|
||||
0.1% KLD: -0.000004
|
||||
Minimum KLD: -0.000012
|
||||
|
||||
====== Token probability statistics ======
|
||||
Mean Δp: -0.007 ± 0.005 %
|
||||
Maximum Δp: 81.280%
|
||||
99.9% Δp: 12.574%
|
||||
99.0% Δp: 4.968%
|
||||
95.0% Δp: 2.242%
|
||||
90.0% Δp: 1.259%
|
||||
75.0% Δp: 0.222%
|
||||
Median Δp: -0.000%
|
||||
25.0% Δp: -0.211%
|
||||
10.0% Δp: -1.245%
|
||||
5.0% Δp: -2.232%
|
||||
1.0% Δp: -5.277%
|
||||
0.1% Δp: -14.266%
|
||||
Minimum Δp: -57.371%
|
||||
RMS Δp : 1.812 ± 0.025 %
|
||||
Same top p: 97.235 ± 0.043 %
|
||||
15
scores/granite-4.1-8b-q7_k.tqa
Normal file
15
scores/granite-4.1-8b-q7_k.tqa
Normal file
@@ -0,0 +1,15 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q7_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 4 tensors
|
||||
llama_model_loader: - type q8_0: 146 tensors
|
||||
llama_model_loader: - type q6_K: 132 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q8_0
|
||||
print_info: file size = 7.68 GiB (7.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 817 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 817 tasks available
|
||||
multiple_choice_score : calculating TruthfulQA score over 750 tasks.
|
||||
|
||||
Final result: 38.5333 +/- 1.7783
|
||||
Random chance: 19.8992 +/- 1.4588
|
||||
13
scores/granite-4.1-8b-q7_k.wng
Normal file
13
scores/granite-4.1-8b-q7_k.wng
Normal file
@@ -0,0 +1,13 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q7_K.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 4 tensors
|
||||
llama_model_loader: - type q8_0: 146 tensors
|
||||
llama_model_loader: - type q6_K: 132 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q8_0
|
||||
print_info: file size = 7.68 GiB (7.50 BPW)
|
||||
|
||||
winogrande_score : loaded 1266 tasks from prompt.
|
||||
winogrande_score : selecting 750 random tasks
|
||||
|
||||
Final Winogrande score(750 tasks): 73.6000 +/- 1.6106
|
||||
16
scores/granite-4.1-8b-q8_0.arc
Normal file
16
scores/granite-4.1-8b-q8_0.arc
Normal file
@@ -0,0 +1,16 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q8_0.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 2 tensors
|
||||
llama_model_loader: - type q8_0: 266 tensors
|
||||
llama_model_loader: - type q6_K: 2 tensors
|
||||
llama_model_loader: - type bf16: 12 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q8_0
|
||||
print_info: file size = 8.70 GiB (8.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 869 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 869 tasks available
|
||||
multiple_choice_score : calculating ARC Challenge score over 750 tasks.
|
||||
|
||||
Final result: 66.8000 +/- 1.7207
|
||||
Random chance: 25.0083 +/- 1.5824
|
||||
16
scores/granite-4.1-8b-q8_0.gpqa
Normal file
16
scores/granite-4.1-8b-q8_0.gpqa
Normal file
@@ -0,0 +1,16 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q8_0.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 2 tensors
|
||||
llama_model_loader: - type q8_0: 266 tensors
|
||||
llama_model_loader: - type q6_K: 2 tensors
|
||||
llama_model_loader: - type bf16: 12 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q8_0
|
||||
print_info: file size = 8.70 GiB (8.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 198 tasks in prompt
|
||||
multiple_choice_score: reading tasks......................................................................................................................................................................................................done
|
||||
multiple_choice_score : calculating GPQA-Diamond score over 198 tasks.
|
||||
|
||||
Final result: 26.7677 +/- 3.1544
|
||||
Random chance: 24.5963 +/- 3.0683
|
||||
15
scores/granite-4.1-8b-q8_0.hsw
Normal file
15
scores/granite-4.1-8b-q8_0.hsw
Normal file
@@ -0,0 +1,15 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q8_0.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 2 tensors
|
||||
llama_model_loader: - type q8_0: 266 tensors
|
||||
llama_model_loader: - type q6_K: 2 tensors
|
||||
llama_model_loader: - type bf16: 12 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q8_0
|
||||
print_info: file size = 8.70 GiB (8.50 BPW)
|
||||
|
||||
hellaswag_score : loaded 10042 tasks from prompt.
|
||||
hellaswag_score : selecting 750 randomized tasks.
|
||||
hellaswag_score : calculating hellaswag score over selected tasks.
|
||||
|
||||
750 80.53333333% [77.5470%, 83.2085%]
|
||||
16
scores/granite-4.1-8b-q8_0.mmlu
Normal file
16
scores/granite-4.1-8b-q8_0.mmlu
Normal file
@@ -0,0 +1,16 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q8_0.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 2 tensors
|
||||
llama_model_loader: - type q8_0: 266 tensors
|
||||
llama_model_loader: - type q6_K: 2 tensors
|
||||
llama_model_loader: - type bf16: 12 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q8_0
|
||||
print_info: file size = 8.70 GiB (8.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 5362 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 5362 tasks available
|
||||
multiple_choice_score : calculating MMLU-Redux score over 750 tasks.
|
||||
|
||||
Final result: 72.4000 +/- 1.6334
|
||||
Random chance: 25.0000 +/- 1.5822
|
||||
50
scores/granite-4.1-8b-q8_0.ppx
Normal file
50
scores/granite-4.1-8b-q8_0.ppx
Normal file
@@ -0,0 +1,50 @@
|
||||
Will perform strided perplexity calculation -> adjusting context size from 3072 to 3264
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q8_0.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 2 tensors
|
||||
llama_model_loader: - type q8_0: 266 tensors
|
||||
llama_model_loader: - type q6_K: 2 tensors
|
||||
llama_model_loader: - type bf16: 12 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q8_0
|
||||
print_info: file size = 8.70 GiB (8.50 BPW)
|
||||
|
||||
====== Perplexity statistics ======
|
||||
Mean PPL(Q) : 8.749119 ± 0.066517
|
||||
Mean PPL(base) : 8.691178 ± 0.065443
|
||||
Cor(ln(PPL(Q)), ln(PPL(base))): 99.85%
|
||||
Mean ln(PPL(Q)/PPL(base)) : 0.006644 ± 0.000418
|
||||
Mean PPL(Q)/PPL(base) : 1.006667 ± 0.000421
|
||||
Mean PPL(Q)-PPL(base) : 0.057941 ± 0.003751
|
||||
|
||||
====== KL divergence statistics ======
|
||||
Mean KLD: 0.002052 ± 0.000024
|
||||
Maximum KLD: 1.924665
|
||||
99.9% KLD: 0.070725
|
||||
99.0% KLD: 0.018533
|
||||
95.0% KLD: 0.006534
|
||||
90.0% KLD: 0.004148
|
||||
Median KLD: 0.000965
|
||||
10.0% KLD: 0.000008
|
||||
5.0% KLD: 0.000002
|
||||
1.0% KLD: -0.000001
|
||||
0.1% KLD: -0.000004
|
||||
Minimum KLD: -0.000013
|
||||
|
||||
====== Token probability statistics ======
|
||||
Mean Δp: 0.031 ± 0.004 %
|
||||
Maximum Δp: 67.287%
|
||||
99.9% Δp: 9.257%
|
||||
99.0% Δp: 4.075%
|
||||
95.0% Δp: 1.929%
|
||||
90.0% Δp: 1.101%
|
||||
75.0% Δp: 0.201%
|
||||
Median Δp: 0.000%
|
||||
25.0% Δp: -0.151%
|
||||
10.0% Δp: -0.973%
|
||||
5.0% Δp: -1.750%
|
||||
1.0% Δp: -3.980%
|
||||
0.1% Δp: -9.904%
|
||||
Minimum Δp: -53.630%
|
||||
RMS Δp : 1.396 ± 0.020 %
|
||||
Same top p: 97.749 ± 0.039 %
|
||||
16
scores/granite-4.1-8b-q8_0.tqa
Normal file
16
scores/granite-4.1-8b-q8_0.tqa
Normal file
@@ -0,0 +1,16 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q8_0.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 2 tensors
|
||||
llama_model_loader: - type q8_0: 266 tensors
|
||||
llama_model_loader: - type q6_K: 2 tensors
|
||||
llama_model_loader: - type bf16: 12 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q8_0
|
||||
print_info: file size = 8.70 GiB (8.50 BPW)
|
||||
|
||||
multiple_choice_score: there are 817 tasks in prompt
|
||||
multiple_choice_score: selecting 750 random tasks from 817 tasks available
|
||||
multiple_choice_score : calculating TruthfulQA score over 750 tasks.
|
||||
|
||||
Final result: 38.4000 +/- 1.7771
|
||||
Random chance: 19.8992 +/- 1.4588
|
||||
14
scores/granite-4.1-8b-q8_0.wng
Normal file
14
scores/granite-4.1-8b-q8_0.wng
Normal file
@@ -0,0 +1,14 @@
|
||||
llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q8_0.gguf (version GGUF V3 (latest))
|
||||
llama_model_loader: - type f32: 81 tensors
|
||||
llama_model_loader: - type q5_1: 2 tensors
|
||||
llama_model_loader: - type q8_0: 266 tensors
|
||||
llama_model_loader: - type q6_K: 2 tensors
|
||||
llama_model_loader: - type bf16: 12 tensors
|
||||
print_info: file format = GGUF V3 (latest)
|
||||
print_info: file type = Q8_0
|
||||
print_info: file size = 8.70 GiB (8.50 BPW)
|
||||
|
||||
winogrande_score : loaded 1266 tasks from prompt.
|
||||
winogrande_score : selecting 750 random tasks
|
||||
|
||||
Final Winogrande score(750 tasks): 73.2000 +/- 1.6184
|
||||
332
scores/granite-4.1-8b.itx
Normal file
332
scores/granite-4.1-8b.itx
Normal file
@@ -0,0 +1,332 @@
|
||||
ggml_cuda_init: found 1 CUDA devices (Total VRAM: 124610 MiB):
|
||||
Device 0: NVIDIA GB10, compute capability 12.1, VMM: yes, VRAM: 124610 MiB
|
||||
|
||||
Computing statistics for imatrix/imatrix-granite-4.1-8b-medium.gguf (280 tensors)
|
||||
|
||||
Layer Tensor Σ(Act²) Min Max μ σ % Active N Entropy E (norm) ZD CosSim
|
||||
=========================================================================================================================================================================
|
||||
39 attn_k 174643.00 2.0243 18914.7031 42.64 497.38 100.00% 4096 8.8904 74.09% 0.37% 0.8865
|
||||
38 attn_k 166087.69 0.0000 20050.7402 40.55 448.81 99.98% 4096 8.6133 71.78% 0.46% 0.9835
|
||||
37 attn_k 146614.73 0.7595 20756.5176 35.79 423.56 100.00% 4096 8.3834 69.86% 0.44% 0.9842
|
||||
36 attn_k 134513.23 0.5200 19003.3398 32.84 386.52 100.00% 4096 8.2079 68.40% 0.51% 0.9501
|
||||
35 attn_k 124035.38 1.3198 11533.5068 30.28 305.84 100.00% 4096 8.2914 69.09% 0.71% 0.9665
|
||||
34 attn_k 123896.47 0.5387 17763.3496 30.25 472.60 100.00% 4096 6.6833 55.69% 0.44% 0.9800
|
||||
32 attn_k 111128.92 0.1590 25305.4961 27.13 504.76 100.00% 4096 6.2867 52.39% 0.37% 0.9931
|
||||
30 attn_k 109705.93 0.0000 33505.8359 26.78 617.29 99.98% 4096 5.4138 45.11% 0.27% 0.9759
|
||||
33 attn_k 109045.48 0.9963 20537.8262 26.62 448.68 100.00% 4096 6.6424 55.35% 0.37% 0.9914
|
||||
29 attn_k 87316.21 0.0000 20308.4004 21.32 398.84 99.95% 4096 6.1991 51.66% 0.37% 0.9936
|
||||
31 attn_k 87185.37 0.0000 22471.5254 21.29 421.05 99.98% 4096 6.2130 51.78% 0.34% 0.9963
|
||||
28 attn_k 82016.98 0.0000 23024.2656 20.02 430.64 99.95% 4096 5.6830 47.36% 0.29% 0.9976
|
||||
27 attn_k 76821.73 0.0000 17911.5156 18.76 348.43 99.98% 4096 6.3444 52.87% 0.34% 0.9791
|
||||
16 attn_k 76133.66 0.0000 11891.1016 18.59 311.73 99.93% 4096 6.1208 51.01% 0.42% 0.9643
|
||||
23 attn_k 74987.37 0.0000 18502.6172 18.31 356.15 99.95% 4096 6.1167 50.97% 0.39% 0.9980
|
||||
22 attn_k 71226.52 0.0000 18282.5273 17.39 343.00 99.98% 4096 6.2861 52.38% 0.32% 0.9901
|
||||
21 attn_k 69479.59 0.0000 14531.5439 16.96 297.70 99.95% 4096 6.6280 55.23% 0.39% 0.9882
|
||||
20 attn_k 68286.74 0.0000 18747.7070 16.67 345.57 99.95% 4096 6.0506 50.42% 0.34% 0.9917
|
||||
25 attn_k 68221.13 0.0000 14562.2783 16.66 294.34 99.98% 4096 6.3805 53.17% 0.39% 0.9829
|
||||
7 attn_k 68092.71 0.0000 17005.3926 16.62 348.43 99.90% 4096 5.9826 49.85% 0.32% 0.9814
|
||||
18 attn_k 66295.24 0.0000 12255.0391 16.19 262.31 99.95% 4096 6.9157 57.63% 0.42% 0.9978
|
||||
26 attn_k 65291.02 0.0000 12545.3467 15.94 268.54 99.98% 4096 6.6067 55.06% 0.39% 0.9920
|
||||
13 attn_k 59896.29 0.0000 8712.0469 14.62 232.75 99.90% 4096 6.9162 57.63% 0.32% 0.9786
|
||||
24 attn_k 56921.47 0.0000 9241.3613 13.90 213.49 99.98% 4096 7.0602 58.84% 0.54% 0.9757
|
||||
17 attn_k 56760.55 0.0000 10113.2490 13.86 223.49 99.95% 4096 6.7278 56.06% 0.44% 0.9569
|
||||
15 attn_k 55194.11 0.0000 9697.7783 13.48 229.94 99.93% 4096 5.9460 49.55% 0.37% 0.9288
|
||||
14 attn_k 54794.45 0.0000 8291.7021 13.38 212.09 99.90% 4096 7.0371 58.64% 0.37% 0.9885
|
||||
6 attn_k 53044.36 0.0000 16721.8047 12.95 321.79 99.88% 4096 4.5291 37.74% 0.32% 0.9479
|
||||
19 attn_k 52733.22 0.0000 11920.8223 12.87 233.97 99.95% 4096 6.5100 54.25% 0.39% 0.9902
|
||||
8 attn_k 51986.04 0.0000 10429.2197 12.69 233.14 99.90% 4096 6.5500 54.58% 0.29% 0.9699
|
||||
9 attn_k 49641.15 0.0000 10131.9502 12.12 241.91 99.90% 4096 5.8877 49.06% 0.34% 0.9950
|
||||
12 attn_k 48607.36 0.0000 7781.5098 11.87 191.06 99.93% 4096 7.0521 58.77% 0.37% 0.9965
|
||||
4 attn_k 46316.68 0.0000 16862.5684 11.31 309.35 99.85% 4096 5.2333 43.61% 0.22% 0.7166
|
||||
5 attn_k 45620.26 0.0000 16766.0762 11.14 317.06 99.88% 4096 4.5011 37.51% 0.22% 0.9947
|
||||
11 attn_k 45317.79 0.0000 6714.2949 11.06 172.61 99.93% 4096 7.2487 60.41% 0.37% 0.9866
|
||||
10 attn_k 43706.77 0.0000 6960.2920 10.67 180.78 99.93% 4096 6.7438 56.20% 0.34% 0.9923
|
||||
2 attn_k 19532.52 0.0000 6018.6182 4.77 99.81 99.90% 4096 7.6636 63.86% 0.20% 0.3212
|
||||
3 attn_k 17324.65 0.0000 5779.3477 4.23 110.43 99.93% 4096 5.5684 46.40% 0.22% 0.9580
|
||||
1 attn_k 15856.24 0.0000 5183.1831 3.87 86.07 98.66% 4096 6.6712 55.59% 0.22% 0.3483
|
||||
0 attn_k 3049.93 0.0000 247.0233 0.74 7.67 97.14% 4096 6.8129 56.77% 1.12% 0.0000
|
||||
39 attn_output 34614.42 1.0529 359.3260 8.45 11.78 100.00% 4096 11.3365 94.47% 7.59% 0.3770
|
||||
38 attn_output 13420.36 0.2976 103.0346 3.28 4.13 100.00% 4096 11.3248 94.37% 8.98% 0.2804
|
||||
37 attn_output 8831.00 0.2297 108.9257 2.16 3.37 100.00% 4096 11.2679 93.90% 5.49% 0.2283
|
||||
36 attn_output 8013.26 0.1413 95.6638 1.96 3.93 100.00% 4096 11.0147 91.79% 4.32% 0.2457
|
||||
35 attn_output 3675.13 0.1878 48.8232 0.90 1.65 100.00% 4096 11.2243 93.54% 4.52% 0.2357
|
||||
34 attn_output 3483.17 0.0606 33.0821 0.85 1.59 100.00% 4096 11.1613 93.01% 3.96% 0.1202
|
||||
33 attn_output 3199.42 0.0368 114.1278 0.78 2.50 100.00% 4096 10.6861 89.05% 3.47% 0.1774
|
||||
31 attn_output 2234.35 0.0523 41.9673 0.55 1.30 100.00% 4096 10.8825 90.69% 5.00% 0.2217
|
||||
1 attn_output 1997.94 0.0106 18.5997 0.49 0.98 100.00% 4096 10.5931 88.28% 10.03% 0.0200
|
||||
2 attn_output 1738.62 0.0199 8.3539 0.42 0.55 100.00% 4096 11.1150 92.63% 16.77% 0.3314
|
||||
32 attn_output 1524.28 0.0124 13.4450 0.37 0.70 100.00% 4096 11.1955 93.30% 3.03% 0.1768
|
||||
30 attn_output 1314.30 0.0875 9.0597 0.32 0.51 100.00% 4096 11.3404 94.50% 3.64% 0.2404
|
||||
28 attn_output 1114.76 0.0367 12.3692 0.27 0.65 100.00% 4096 10.8974 90.81% 3.59% 0.1806
|
||||
3 attn_output 1080.53 0.0156 5.2009 0.26 0.36 100.00% 4096 11.1964 93.30% 10.28% 0.2966
|
||||
24 attn_output 914.61 0.0400 6.5156 0.22 0.32 100.00% 4096 11.3443 94.54% 6.69% 0.1864
|
||||
23 attn_output 900.86 0.0061 16.3921 0.22 0.55 100.00% 4096 10.9512 91.26% 3.27% 0.1833
|
||||
29 attn_output 868.15 0.0168 8.5233 0.21 0.45 100.00% 4096 11.0238 91.87% 4.83% 0.1848
|
||||
27 attn_output 855.07 0.0081 8.6770 0.21 0.41 100.00% 4096 11.1371 92.81% 3.39% 0.2051
|
||||
22 attn_output 812.40 0.0144 7.8408 0.20 0.37 100.00% 4096 11.1740 93.12% 3.88% 0.2016
|
||||
17 attn_output 711.99 0.0085 4.7093 0.17 0.28 100.00% 4096 11.1963 93.30% 4.81% 0.2343
|
||||
21 attn_output 709.29 0.0000 10.5542 0.17 0.37 100.00% 4096 10.9892 91.58% 4.57% 0.1817
|
||||
19 attn_output 707.10 0.0186 6.2341 0.17 0.23 100.00% 4096 11.4981 95.82% 3.22% 0.2283
|
||||
20 attn_output 703.72 0.0083 5.4485 0.17 0.28 100.00% 4096 11.2945 94.12% 3.93% 0.2992
|
||||
25 attn_output 656.68 0.0052 10.7580 0.16 0.38 100.00% 4096 11.0354 91.96% 2.08% 0.2214
|
||||
15 attn_output 646.34 0.0052 8.5272 0.16 0.29 100.00% 4096 11.0779 92.32% 6.45% 0.2046
|
||||
16 attn_output 610.13 0.0001 6.2649 0.15 0.35 100.00% 4096 10.7437 89.53% 3.83% 0.1555
|
||||
26 attn_output 609.93 0.0232 5.7634 0.15 0.28 100.00% 4096 11.2354 93.63% 3.52% 0.1915
|
||||
13 attn_output 548.35 0.0033 4.0648 0.13 0.21 100.00% 4096 10.8651 90.54% 12.60% 0.4666
|
||||
18 attn_output 516.94 0.0000 10.2987 0.13 0.28 99.76% 4096 11.0928 92.44% 2.69% 0.2541
|
||||
4 attn_output 453.05 0.0051 3.9059 0.11 0.15 100.00% 4096 11.3247 94.37% 8.91% 0.3225
|
||||
0 attn_output 447.61 0.0008 91.7077 0.11 1.72 100.00% 4096 6.9468 57.89% 0.73% 0.0000
|
||||
10 attn_output 439.24 0.0036 3.4716 0.11 0.26 100.00% 4096 10.4123 86.77% 7.06% 0.2021
|
||||
11 attn_output 401.22 0.0051 1.9410 0.10 0.15 100.00% 4096 11.0397 92.00% 10.16% 0.1149
|
||||
14 attn_output 350.10 0.0024 3.9673 0.09 0.14 100.00% 4096 11.0800 92.33% 6.81% 0.2386
|
||||
5 attn_output 296.64 0.0028 3.5316 0.07 0.11 100.00% 4096 11.1694 93.08% 6.15% 0.2990
|
||||
7 attn_output 240.15 0.0033 3.3802 0.06 0.11 100.00% 4096 11.1050 92.54% 5.96% 0.3540
|
||||
6 attn_output 218.34 0.0015 1.1016 0.05 0.07 100.00% 4096 11.2492 93.74% 10.18% 0.2733
|
||||
8 attn_output 208.74 0.0015 5.2777 0.05 0.12 100.00% 4096 10.9654 91.38% 5.47% 0.1650
|
||||
12 attn_output 208.53 0.0032 2.3583 0.05 0.07 100.00% 4096 11.4232 95.19% 9.01% 0.3528
|
||||
9 attn_output 151.65 0.0025 1.4596 0.04 0.05 100.00% 4096 11.3319 94.43% 6.32% 0.2434
|
||||
39 attn_q 174643.00 2.0243 18914.7031 42.64 497.38 100.00% 4096 8.8904 74.09% 0.37% 0.8865
|
||||
38 attn_q 166087.69 0.0000 20050.7402 40.55 448.81 99.98% 4096 8.6133 71.78% 0.46% 0.9835
|
||||
37 attn_q 146614.73 0.7595 20756.5176 35.79 423.56 100.00% 4096 8.3834 69.86% 0.44% 0.9842
|
||||
36 attn_q 134513.23 0.5200 19003.3398 32.84 386.52 100.00% 4096 8.2079 68.40% 0.51% 0.9501
|
||||
35 attn_q 124035.38 1.3198 11533.5068 30.28 305.84 100.00% 4096 8.2914 69.09% 0.71% 0.9665
|
||||
34 attn_q 123896.47 0.5387 17763.3496 30.25 472.60 100.00% 4096 6.6833 55.69% 0.44% 0.9800
|
||||
32 attn_q 111128.92 0.1590 25305.4961 27.13 504.76 100.00% 4096 6.2867 52.39% 0.37% 0.9931
|
||||
30 attn_q 109705.93 0.0000 33505.8359 26.78 617.29 99.98% 4096 5.4138 45.11% 0.27% 0.9759
|
||||
33 attn_q 109045.48 0.9963 20537.8262 26.62 448.68 100.00% 4096 6.6424 55.35% 0.37% 0.9914
|
||||
29 attn_q 87316.21 0.0000 20308.4004 21.32 398.84 99.95% 4096 6.1991 51.66% 0.37% 0.9936
|
||||
31 attn_q 87185.37 0.0000 22471.5254 21.29 421.05 99.98% 4096 6.2130 51.78% 0.34% 0.9963
|
||||
28 attn_q 82016.98 0.0000 23024.2656 20.02 430.64 99.95% 4096 5.6830 47.36% 0.29% 0.9976
|
||||
27 attn_q 76821.73 0.0000 17911.5156 18.76 348.43 99.98% 4096 6.3444 52.87% 0.34% 0.9791
|
||||
16 attn_q 76133.66 0.0000 11891.1016 18.59 311.73 99.93% 4096 6.1208 51.01% 0.42% 0.9643
|
||||
23 attn_q 74987.37 0.0000 18502.6172 18.31 356.15 99.95% 4096 6.1167 50.97% 0.39% 0.9980
|
||||
22 attn_q 71226.52 0.0000 18282.5273 17.39 343.00 99.98% 4096 6.2861 52.38% 0.32% 0.9901
|
||||
21 attn_q 69479.59 0.0000 14531.5439 16.96 297.70 99.95% 4096 6.6280 55.23% 0.39% 0.9882
|
||||
20 attn_q 68286.74 0.0000 18747.7070 16.67 345.57 99.95% 4096 6.0506 50.42% 0.34% 0.9917
|
||||
25 attn_q 68221.13 0.0000 14562.2783 16.66 294.34 99.98% 4096 6.3805 53.17% 0.39% 0.9829
|
||||
7 attn_q 68092.71 0.0000 17005.3926 16.62 348.43 99.90% 4096 5.9826 49.85% 0.32% 0.9814
|
||||
18 attn_q 66295.24 0.0000 12255.0391 16.19 262.31 99.95% 4096 6.9157 57.63% 0.42% 0.9978
|
||||
26 attn_q 65291.02 0.0000 12545.3467 15.94 268.54 99.98% 4096 6.6067 55.06% 0.39% 0.9920
|
||||
13 attn_q 59896.29 0.0000 8712.0469 14.62 232.75 99.90% 4096 6.9162 57.63% 0.32% 0.9786
|
||||
24 attn_q 56921.47 0.0000 9241.3613 13.90 213.49 99.98% 4096 7.0602 58.84% 0.54% 0.9757
|
||||
17 attn_q 56760.55 0.0000 10113.2490 13.86 223.49 99.95% 4096 6.7278 56.06% 0.44% 0.9569
|
||||
15 attn_q 55194.11 0.0000 9697.7783 13.48 229.94 99.93% 4096 5.9460 49.55% 0.37% 0.9288
|
||||
14 attn_q 54794.45 0.0000 8291.7021 13.38 212.09 99.90% 4096 7.0371 58.64% 0.37% 0.9885
|
||||
6 attn_q 53044.36 0.0000 16721.8047 12.95 321.79 99.88% 4096 4.5291 37.74% 0.32% 0.9479
|
||||
19 attn_q 52733.22 0.0000 11920.8223 12.87 233.97 99.95% 4096 6.5100 54.25% 0.39% 0.9902
|
||||
8 attn_q 51986.04 0.0000 10429.2197 12.69 233.14 99.90% 4096 6.5500 54.58% 0.29% 0.9699
|
||||
9 attn_q 49641.15 0.0000 10131.9502 12.12 241.91 99.90% 4096 5.8877 49.06% 0.34% 0.9950
|
||||
12 attn_q 48607.36 0.0000 7781.5098 11.87 191.06 99.93% 4096 7.0521 58.77% 0.37% 0.9965
|
||||
4 attn_q 46316.68 0.0000 16862.5684 11.31 309.35 99.85% 4096 5.2333 43.61% 0.22% 0.7166
|
||||
5 attn_q 45620.26 0.0000 16766.0762 11.14 317.06 99.88% 4096 4.5011 37.51% 0.22% 0.9947
|
||||
11 attn_q 45317.79 0.0000 6714.2949 11.06 172.61 99.93% 4096 7.2487 60.41% 0.37% 0.9866
|
||||
10 attn_q 43706.77 0.0000 6960.2920 10.67 180.78 99.93% 4096 6.7438 56.20% 0.34% 0.9923
|
||||
2 attn_q 19532.52 0.0000 6018.6182 4.77 99.81 99.90% 4096 7.6636 63.86% 0.20% 0.3212
|
||||
3 attn_q 17324.65 0.0000 5779.3477 4.23 110.43 99.93% 4096 5.5684 46.40% 0.22% 0.9580
|
||||
1 attn_q 15856.24 0.0000 5183.1831 3.87 86.07 98.66% 4096 6.6712 55.59% 0.22% 0.3483
|
||||
0 attn_q 3049.93 0.0000 247.0233 0.74 7.67 97.14% 4096 6.8129 56.77% 1.12% 0.0000
|
||||
39 attn_v 174643.00 2.0243 18914.7031 42.64 497.38 100.00% 4096 8.8904 74.09% 0.37% 0.8865
|
||||
38 attn_v 166087.69 0.0000 20050.7402 40.55 448.81 99.98% 4096 8.6133 71.78% 0.46% 0.9835
|
||||
37 attn_v 146614.73 0.7595 20756.5176 35.79 423.56 100.00% 4096 8.3834 69.86% 0.44% 0.9842
|
||||
36 attn_v 134513.23 0.5200 19003.3398 32.84 386.52 100.00% 4096 8.2079 68.40% 0.51% 0.9501
|
||||
35 attn_v 124035.38 1.3198 11533.5068 30.28 305.84 100.00% 4096 8.2914 69.09% 0.71% 0.9665
|
||||
34 attn_v 123896.47 0.5387 17763.3496 30.25 472.60 100.00% 4096 6.6833 55.69% 0.44% 0.9800
|
||||
32 attn_v 111128.92 0.1590 25305.4961 27.13 504.76 100.00% 4096 6.2867 52.39% 0.37% 0.9931
|
||||
30 attn_v 109705.93 0.0000 33505.8359 26.78 617.29 99.98% 4096 5.4138 45.11% 0.27% 0.9759
|
||||
33 attn_v 109045.48 0.9963 20537.8262 26.62 448.68 100.00% 4096 6.6424 55.35% 0.37% 0.9914
|
||||
29 attn_v 87316.21 0.0000 20308.4004 21.32 398.84 99.95% 4096 6.1991 51.66% 0.37% 0.9936
|
||||
31 attn_v 87185.37 0.0000 22471.5254 21.29 421.05 99.98% 4096 6.2130 51.78% 0.34% 0.9963
|
||||
28 attn_v 82016.98 0.0000 23024.2656 20.02 430.64 99.95% 4096 5.6830 47.36% 0.29% 0.9976
|
||||
27 attn_v 76821.73 0.0000 17911.5156 18.76 348.43 99.98% 4096 6.3444 52.87% 0.34% 0.9791
|
||||
16 attn_v 76133.66 0.0000 11891.1016 18.59 311.73 99.93% 4096 6.1208 51.01% 0.42% 0.9643
|
||||
23 attn_v 74987.37 0.0000 18502.6172 18.31 356.15 99.95% 4096 6.1167 50.97% 0.39% 0.9980
|
||||
22 attn_v 71226.52 0.0000 18282.5273 17.39 343.00 99.98% 4096 6.2861 52.38% 0.32% 0.9901
|
||||
21 attn_v 69479.59 0.0000 14531.5439 16.96 297.70 99.95% 4096 6.6280 55.23% 0.39% 0.9882
|
||||
20 attn_v 68286.74 0.0000 18747.7070 16.67 345.57 99.95% 4096 6.0506 50.42% 0.34% 0.9917
|
||||
25 attn_v 68221.13 0.0000 14562.2783 16.66 294.34 99.98% 4096 6.3805 53.17% 0.39% 0.9829
|
||||
7 attn_v 68092.71 0.0000 17005.3926 16.62 348.43 99.90% 4096 5.9826 49.85% 0.32% 0.9814
|
||||
18 attn_v 66295.24 0.0000 12255.0391 16.19 262.31 99.95% 4096 6.9157 57.63% 0.42% 0.9978
|
||||
26 attn_v 65291.02 0.0000 12545.3467 15.94 268.54 99.98% 4096 6.6067 55.06% 0.39% 0.9920
|
||||
13 attn_v 59896.29 0.0000 8712.0469 14.62 232.75 99.90% 4096 6.9162 57.63% 0.32% 0.9786
|
||||
24 attn_v 56921.47 0.0000 9241.3613 13.90 213.49 99.98% 4096 7.0602 58.84% 0.54% 0.9757
|
||||
17 attn_v 56760.55 0.0000 10113.2490 13.86 223.49 99.95% 4096 6.7278 56.06% 0.44% 0.9569
|
||||
15 attn_v 55194.11 0.0000 9697.7783 13.48 229.94 99.93% 4096 5.9460 49.55% 0.37% 0.9288
|
||||
14 attn_v 54794.45 0.0000 8291.7021 13.38 212.09 99.90% 4096 7.0371 58.64% 0.37% 0.9885
|
||||
6 attn_v 53044.36 0.0000 16721.8047 12.95 321.79 99.88% 4096 4.5291 37.74% 0.32% 0.9479
|
||||
19 attn_v 52733.22 0.0000 11920.8223 12.87 233.97 99.95% 4096 6.5100 54.25% 0.39% 0.9902
|
||||
8 attn_v 51986.04 0.0000 10429.2197 12.69 233.14 99.90% 4096 6.5500 54.58% 0.29% 0.9699
|
||||
9 attn_v 49641.15 0.0000 10131.9502 12.12 241.91 99.90% 4096 5.8877 49.06% 0.34% 0.9950
|
||||
12 attn_v 48607.36 0.0000 7781.5098 11.87 191.06 99.93% 4096 7.0521 58.77% 0.37% 0.9965
|
||||
4 attn_v 46316.68 0.0000 16862.5684 11.31 309.35 99.85% 4096 5.2333 43.61% 0.22% 0.7166
|
||||
5 attn_v 45620.26 0.0000 16766.0762 11.14 317.06 99.88% 4096 4.5011 37.51% 0.22% 0.9947
|
||||
11 attn_v 45317.79 0.0000 6714.2949 11.06 172.61 99.93% 4096 7.2487 60.41% 0.37% 0.9866
|
||||
10 attn_v 43706.77 0.0000 6960.2920 10.67 180.78 99.93% 4096 6.7438 56.20% 0.34% 0.9923
|
||||
2 attn_v 19532.52 0.0000 6018.6182 4.77 99.81 99.90% 4096 7.6636 63.86% 0.20% 0.3212
|
||||
3 attn_v 17324.65 0.0000 5779.3477 4.23 110.43 99.93% 4096 5.5684 46.40% 0.22% 0.9580
|
||||
1 attn_v 15856.24 0.0000 5183.1831 3.87 86.07 98.66% 4096 6.6712 55.59% 0.22% 0.3483
|
||||
0 attn_v 3049.93 0.0000 247.0233 0.74 7.67 97.14% 4096 6.8129 56.77% 1.12% 0.0000
|
||||
39 ffn_down 1645556.38 0.0689 81837.1094 128.56 1422.92 100.00% 12800 9.8110 71.91% 1.00% 0.0173
|
||||
5 ffn_down 381550.03 0.0094 210713.1406 29.81 2109.55 100.00% 12800 1.6182 11.86% 0.03% 0.0011
|
||||
38 ffn_down 242713.44 0.2437 7073.1206 18.96 84.75 100.00% 12800 12.6848 92.97% 0.70% 0.0476
|
||||
3 ffn_down 188951.92 0.0109 147793.5938 14.76 1332.36 100.00% 12800 1.2131 8.89% 0.03% 0.0026
|
||||
37 ffn_down 142317.03 0.0870 5205.8745 11.12 52.73 100.00% 12800 12.6322 92.59% 0.67% 0.0329
|
||||
36 ffn_down 83945.62 0.0217 2698.7646 6.56 37.12 100.00% 12800 11.9912 87.89% 1.06% 0.0443
|
||||
35 ffn_down 42474.45 0.0859 952.5922 3.32 13.19 100.00% 12800 12.1797 89.27% 2.48% 0.0830
|
||||
34 ffn_down 26102.06 0.0438 229.3722 2.04 5.52 100.00% 12800 12.2821 90.02% 4.28% 0.1049
|
||||
33 ffn_down 12264.68 0.0564 152.2174 0.96 2.91 100.00% 12800 12.2756 89.97% 3.31% 0.0943
|
||||
0 ffn_down 6042.44 0.0000 2911.0620 0.47 25.98 100.00% 12800 5.6061 41.09% 0.12% 0.0000
|
||||
32 ffn_down 5920.96 0.0421 99.5476 0.46 1.42 100.00% 12800 12.4740 91.43% 3.20% 0.0558
|
||||
31 ffn_down 3874.26 0.0193 185.5614 0.30 1.72 100.00% 12800 12.4092 90.95% 1.28% 0.0903
|
||||
30 ffn_down 3486.52 0.0333 71.5827 0.27 0.87 100.00% 12800 12.5027 91.64% 2.80% 0.0724
|
||||
29 ffn_down 2260.60 0.0105 59.7711 0.18 0.70 100.00% 12800 12.4988 91.61% 1.99% 0.0480
|
||||
2 ffn_down 1941.90 0.0007 217.4457 0.15 1.95 100.00% 12800 11.9799 87.80% 0.16% 0.0440
|
||||
28 ffn_down 1810.21 0.0343 70.6592 0.14 0.70 100.00% 12800 12.6709 92.87% 0.97% 0.0639
|
||||
1 ffn_down 1559.32 0.0002 17.3429 0.12 0.21 100.00% 12800 13.0362 95.55% 5.21% 0.0058
|
||||
27 ffn_down 1505.68 0.0334 16.9273 0.12 0.33 100.00% 12800 12.8432 94.13% 1.88% 0.0300
|
||||
23 ffn_down 1423.83 0.0149 23.6004 0.11 0.42 100.00% 12800 12.5741 92.16% 1.63% 0.0554
|
||||
24 ffn_down 1422.49 0.0148 57.0210 0.11 0.56 100.00% 12800 12.6908 93.01% 0.84% 0.0463
|
||||
26 ffn_down 1401.04 0.0277 123.4429 0.11 1.10 100.00% 12800 12.4077 90.94% 0.29% 0.0151
|
||||
25 ffn_down 1375.32 0.0282 80.1291 0.11 0.74 100.00% 12800 12.6175 92.48% 0.48% 0.0268
|
||||
22 ffn_down 1156.34 0.0120 34.4371 0.09 0.38 100.00% 12800 12.6976 93.06% 1.30% 0.0673
|
||||
21 ffn_down 1138.10 0.0147 24.3792 0.09 0.28 100.00% 12800 12.7839 93.70% 2.28% 0.0947
|
||||
20 ffn_down 1136.30 0.0127 21.3931 0.09 0.26 100.00% 12800 12.8313 94.04% 2.33% 0.1177
|
||||
18 ffn_down 1119.47 0.0176 4.8772 0.09 0.14 100.00% 12800 12.9995 95.28% 5.52% 0.2902
|
||||
19 ffn_down 1116.88 0.0151 20.6434 0.09 0.23 100.00% 12800 12.9391 94.83% 2.98% 0.1868
|
||||
16 ffn_down 1105.64 0.0134 17.2072 0.09 0.19 100.00% 12800 13.0591 95.71% 3.31% 0.0832
|
||||
17 ffn_down 1041.27 0.0162 9.5109 0.08 0.13 100.00% 12800 13.1452 96.34% 4.94% 0.2264
|
||||
15 ffn_down 1024.12 0.0230 43.3770 0.08 0.40 100.00% 12800 12.7413 93.39% 1.24% 0.0284
|
||||
6 ffn_down 1022.13 0.0078 352.7156 0.08 3.12 100.00% 12800 9.2787 68.01% 0.05% 0.0001
|
||||
14 ffn_down 951.70 0.0253 49.8595 0.07 0.50 100.00% 12800 12.6100 92.42% 0.50% 0.0720
|
||||
11 ffn_down 926.64 0.0218 12.4872 0.07 0.13 100.00% 12800 13.2980 97.47% 1.80% 0.2713
|
||||
4 ffn_down 923.82 0.0080 22.2460 0.07 0.37 100.00% 12800 12.2201 89.57% 1.02% 0.0012
|
||||
12 ffn_down 888.91 0.0240 7.9189 0.07 0.12 100.00% 12800 13.2283 96.95% 2.80% 0.2412
|
||||
10 ffn_down 857.79 0.0211 5.1418 0.07 0.10 100.00% 12800 13.1941 96.70% 3.75% 0.1709
|
||||
13 ffn_down 845.87 0.0263 8.8175 0.07 0.11 100.00% 12800 13.2368 97.02% 2.59% 0.2583
|
||||
9 ffn_down 829.72 0.0127 11.8182 0.06 0.21 100.00% 12800 12.6312 92.58% 1.71% 0.1047
|
||||
7 ffn_down 761.20 0.0088 17.7730 0.06 0.18 100.00% 12800 13.0089 95.35% 1.20% 0.0092
|
||||
8 ffn_down 714.58 0.0115 9.3180 0.06 0.15 100.00% 12800 12.9415 94.85% 1.95% 0.1090
|
||||
39 ffn_gate 73855.20 0.0000 27529.4883 18.03 432.89 99.98% 4096 7.6055 63.38% 0.27% 0.9793
|
||||
38 ffn_gate 29146.70 0.0000 3154.5896 7.12 50.92 99.98% 4096 10.6959 89.13% 0.63% 0.9972
|
||||
37 ffn_gate 23469.74 0.0000 1999.8401 5.73 32.33 100.00% 4096 11.0207 91.84% 0.66% 0.9944
|
||||
36 ffn_gate 18879.52 0.0000 1587.3755 4.61 26.11 99.98% 4096 10.9542 91.28% 0.68% 0.9535
|
||||
35 ffn_gate 12609.52 0.0000 691.8613 3.08 13.18 99.95% 4096 10.9728 91.44% 1.10% 0.7708
|
||||
34 ffn_gate 9369.36 0.0000 369.9386 2.29 8.82 99.98% 4096 10.8088 90.07% 1.39% 0.9703
|
||||
33 ffn_gate 6623.49 0.0000 206.4922 1.62 5.04 99.98% 4096 11.0320 91.93% 1.56% 0.9570
|
||||
32 ffn_gate 4946.19 0.2047 142.2780 1.21 3.44 100.00% 4096 11.1646 93.04% 1.49% 0.8361
|
||||
0 ffn_gate 4647.99 0.0000 530.0613 1.13 10.15 68.41% 4096 8.6122 71.77% 1.15% 0.0000
|
||||
31 ffn_gate 4116.51 0.0000 180.2167 1.01 3.74 99.98% 4096 11.0407 92.01% 1.32% 0.5779
|
||||
1 ffn_gate 3766.61 0.0000 642.9065 0.92 11.02 99.22% 4096 9.7890 81.57% 0.12% 0.0923
|
||||
30 ffn_gate 3736.23 0.0000 93.6410 0.91 2.60 100.00% 4096 11.2482 93.73% 1.29% 0.9941
|
||||
29 ffn_gate 3591.15 0.0000 88.1915 0.88 2.41 99.98% 4096 11.3407 94.51% 1.10% 0.9684
|
||||
25 ffn_gate 3420.11 0.0000 73.2920 0.83 2.34 99.98% 4096 11.3321 94.43% 0.85% 0.9818
|
||||
12 ffn_gate 3417.91 0.0000 98.0158 0.83 1.89 99.95% 4096 11.6038 96.70% 0.49% 0.9870
|
||||
26 ffn_gate 3406.55 0.0000 75.7852 0.83 2.13 99.98% 4096 11.4214 95.18% 0.88% 0.9927
|
||||
13 ffn_gate 3312.10 0.0000 90.1448 0.81 1.61 99.98% 4096 11.6814 97.34% 0.51% 0.9759
|
||||
11 ffn_gate 3301.55 0.0000 105.7369 0.81 2.04 99.95% 4096 11.5318 96.10% 0.44% 0.9904
|
||||
28 ffn_gate 3275.83 0.0000 68.6310 0.80 1.93 99.98% 4096 11.4178 95.15% 1.22% 0.9578
|
||||
27 ffn_gate 3236.11 0.0000 72.2997 0.79 1.87 99.98% 4096 11.4812 95.68% 1.03% 0.9950
|
||||
14 ffn_gate 3080.71 0.0000 65.3723 0.75 1.33 99.98% 4096 11.6985 97.49% 0.71% 0.9506
|
||||
16 ffn_gate 3016.78 0.0000 55.2134 0.74 1.58 99.98% 4096 11.5581 96.32% 0.76% 0.9874
|
||||
24 ffn_gate 3010.61 0.0000 73.8905 0.74 2.21 100.00% 4096 11.2557 93.80% 0.93% 0.9690
|
||||
22 ffn_gate 2879.76 0.0001 77.3697 0.70 2.02 100.00% 4096 11.3401 94.50% 0.81% 0.9957
|
||||
17 ffn_gate 2870.70 0.0000 51.7692 0.70 1.48 99.98% 4096 11.5662 96.39% 0.73% 0.9959
|
||||
23 ffn_gate 2843.65 0.0001 89.6189 0.69 2.21 100.00% 4096 11.2398 93.66% 0.95% 0.9937
|
||||
10 ffn_gate 2833.02 0.0000 85.4598 0.69 1.74 99.90% 4096 11.5267 96.06% 0.46% 0.8381
|
||||
19 ffn_gate 2776.20 0.0000 70.3380 0.68 1.86 100.00% 4096 11.3917 94.93% 0.71% 0.9944
|
||||
15 ffn_gate 2760.72 0.0000 57.4514 0.67 1.45 99.95% 4096 11.5659 96.38% 0.83% 0.9038
|
||||
18 ffn_gate 2754.00 0.0001 61.1560 0.67 1.55 100.00% 4096 11.5243 96.04% 0.76% 0.9902
|
||||
21 ffn_gate 2737.92 0.0000 70.9311 0.67 1.86 99.98% 4096 11.3741 94.78% 0.81% 0.9902
|
||||
20 ffn_gate 2699.39 0.0000 65.5763 0.66 1.79 99.98% 4096 11.3889 94.91% 0.83% 0.9932
|
||||
8 ffn_gate 2469.90 0.0000 103.2819 0.60 1.84 99.93% 4096 11.4803 95.67% 0.56% 0.9061
|
||||
2 ffn_gate 2319.60 0.0000 630.8475 0.57 10.17 99.80% 4096 8.7421 72.85% 0.12% 0.0200
|
||||
9 ffn_gate 2312.98 0.0000 70.3176 0.56 1.47 99.93% 4096 11.4845 95.70% 0.76% 0.9660
|
||||
7 ffn_gate 2253.67 0.0000 58.9012 0.55 1.44 99.93% 4096 11.5263 96.05% 0.54% 0.9532
|
||||
5 ffn_gate 2213.64 0.0000 330.8649 0.54 5.46 99.93% 4096 10.0780 83.98% 0.24% 0.2463
|
||||
6 ffn_gate 1791.24 0.0000 42.9261 0.44 1.12 99.90% 4096 11.4775 95.65% 0.66% 0.2691
|
||||
4 ffn_gate 1789.65 0.0000 112.6252 0.44 2.11 99.93% 4096 11.0733 92.28% 0.29% 0.9765
|
||||
3 ffn_gate 1716.85 0.0000 175.6357 0.42 3.39 99.90% 4096 10.3538 86.28% 0.22% 0.8606
|
||||
39 ffn_up 73855.20 0.0000 27529.4883 18.03 432.89 99.98% 4096 7.6055 63.38% 0.27% 0.9793
|
||||
38 ffn_up 29146.70 0.0000 3154.5896 7.12 50.92 99.98% 4096 10.6959 89.13% 0.63% 0.9972
|
||||
37 ffn_up 23469.74 0.0000 1999.8401 5.73 32.33 100.00% 4096 11.0207 91.84% 0.66% 0.9944
|
||||
36 ffn_up 18879.52 0.0000 1587.3755 4.61 26.11 99.98% 4096 10.9542 91.28% 0.68% 0.9535
|
||||
35 ffn_up 12609.52 0.0000 691.8613 3.08 13.18 99.95% 4096 10.9728 91.44% 1.10% 0.7708
|
||||
34 ffn_up 9369.36 0.0000 369.9386 2.29 8.82 99.98% 4096 10.8088 90.07% 1.39% 0.9703
|
||||
33 ffn_up 6623.49 0.0000 206.4922 1.62 5.04 99.98% 4096 11.0320 91.93% 1.56% 0.9570
|
||||
32 ffn_up 4946.19 0.2047 142.2780 1.21 3.44 100.00% 4096 11.1646 93.04% 1.49% 0.8361
|
||||
0 ffn_up 4647.99 0.0000 530.0613 1.13 10.15 68.41% 4096 8.6122 71.77% 1.15% 0.0000
|
||||
31 ffn_up 4116.51 0.0000 180.2167 1.01 3.74 99.98% 4096 11.0407 92.01% 1.32% 0.5779
|
||||
1 ffn_up 3766.61 0.0000 642.9065 0.92 11.02 99.22% 4096 9.7890 81.57% 0.12% 0.0923
|
||||
30 ffn_up 3736.23 0.0000 93.6410 0.91 2.60 100.00% 4096 11.2482 93.73% 1.29% 0.9941
|
||||
29 ffn_up 3591.15 0.0000 88.1915 0.88 2.41 99.98% 4096 11.3407 94.51% 1.10% 0.9684
|
||||
25 ffn_up 3420.11 0.0000 73.2920 0.83 2.34 99.98% 4096 11.3321 94.43% 0.85% 0.9818
|
||||
12 ffn_up 3417.91 0.0000 98.0158 0.83 1.89 99.95% 4096 11.6038 96.70% 0.49% 0.9870
|
||||
26 ffn_up 3406.55 0.0000 75.7852 0.83 2.13 99.98% 4096 11.4214 95.18% 0.88% 0.9927
|
||||
13 ffn_up 3312.10 0.0000 90.1448 0.81 1.61 99.98% 4096 11.6814 97.34% 0.51% 0.9759
|
||||
11 ffn_up 3301.55 0.0000 105.7369 0.81 2.04 99.95% 4096 11.5318 96.10% 0.44% 0.9904
|
||||
28 ffn_up 3275.83 0.0000 68.6310 0.80 1.93 99.98% 4096 11.4178 95.15% 1.22% 0.9578
|
||||
27 ffn_up 3236.11 0.0000 72.2997 0.79 1.87 99.98% 4096 11.4812 95.68% 1.03% 0.9950
|
||||
14 ffn_up 3080.71 0.0000 65.3723 0.75 1.33 99.98% 4096 11.6985 97.49% 0.71% 0.9506
|
||||
16 ffn_up 3016.78 0.0000 55.2134 0.74 1.58 99.98% 4096 11.5581 96.32% 0.76% 0.9874
|
||||
24 ffn_up 3010.61 0.0000 73.8905 0.74 2.21 100.00% 4096 11.2557 93.80% 0.93% 0.9690
|
||||
22 ffn_up 2879.76 0.0001 77.3697 0.70 2.02 100.00% 4096 11.3401 94.50% 0.81% 0.9957
|
||||
17 ffn_up 2870.70 0.0000 51.7692 0.70 1.48 99.98% 4096 11.5662 96.39% 0.73% 0.9959
|
||||
23 ffn_up 2843.65 0.0001 89.6189 0.69 2.21 100.00% 4096 11.2398 93.66% 0.95% 0.9937
|
||||
10 ffn_up 2833.02 0.0000 85.4598 0.69 1.74 99.90% 4096 11.5267 96.06% 0.46% 0.8381
|
||||
19 ffn_up 2776.20 0.0000 70.3380 0.68 1.86 100.00% 4096 11.3917 94.93% 0.71% 0.9944
|
||||
15 ffn_up 2760.72 0.0000 57.4514 0.67 1.45 99.95% 4096 11.5659 96.38% 0.83% 0.9038
|
||||
18 ffn_up 2754.00 0.0001 61.1560 0.67 1.55 100.00% 4096 11.5243 96.04% 0.76% 0.9902
|
||||
21 ffn_up 2737.92 0.0000 70.9311 0.67 1.86 99.98% 4096 11.3741 94.78% 0.81% 0.9902
|
||||
20 ffn_up 2699.39 0.0000 65.5763 0.66 1.79 99.98% 4096 11.3889 94.91% 0.83% 0.9932
|
||||
8 ffn_up 2469.90 0.0000 103.2819 0.60 1.84 99.93% 4096 11.4803 95.67% 0.56% 0.9061
|
||||
2 ffn_up 2319.60 0.0000 630.8475 0.57 10.17 99.80% 4096 8.7421 72.85% 0.12% 0.0200
|
||||
9 ffn_up 2312.98 0.0000 70.3176 0.56 1.47 99.93% 4096 11.4845 95.70% 0.76% 0.9660
|
||||
7 ffn_up 2253.67 0.0000 58.9012 0.55 1.44 99.93% 4096 11.5263 96.05% 0.54% 0.9532
|
||||
5 ffn_up 2213.64 0.0000 330.8649 0.54 5.46 99.93% 4096 10.0780 83.98% 0.24% 0.2463
|
||||
6 ffn_up 1791.24 0.0000 42.9261 0.44 1.12 99.90% 4096 11.4775 95.65% 0.66% 0.2691
|
||||
4 ffn_up 1789.65 0.0000 112.6252 0.44 2.11 99.93% 4096 11.0733 92.28% 0.29% 0.9765
|
||||
3 ffn_up 1716.85 0.0000 175.6357 0.42 3.39 99.90% 4096 10.3538 86.28% 0.22% 0.8606
|
||||
|
||||
Computing weighted average statistics per layer (40 layers)
|
||||
|
||||
Layer μΣ(Act²) μZD μCosSim
|
||||
================================================
|
||||
0 4139.83 0.7411% 0.0000
|
||||
1 6791.54 2.9832% 0.1389
|
||||
2 7785.62 1.9852% 0.1614
|
||||
3 70900.05 1.2575% 0.5370
|
||||
4 15985.68 1.4608% 0.4854
|
||||
5 146183.94 0.8107% 0.4142
|
||||
6 18205.81 1.3806% 0.4006
|
||||
7 23167.60 1.2842% 0.5735
|
||||
8 17900.25 1.4849% 0.5729
|
||||
9 17128.10 1.5571% 0.6014
|
||||
10 15332.18 2.2715% 0.5906
|
||||
11 15983.94 1.9451% 0.6469
|
||||
12 17056.90 2.1752% 0.6652
|
||||
13 20767.65 2.4856% 0.6752
|
||||
14 19054.13 1.1933% 0.5841
|
||||
15 19172.65 1.4341% 0.5356
|
||||
16 26136.96 1.8568% 0.5790
|
||||
17 19724.82 2.5230% 0.6361
|
||||
18 22839.35 2.4882% 0.6723
|
||||
19 18405.42 1.6588% 0.6325
|
||||
20 23508.35 1.5224% 0.6168
|
||||
21 23910.19 1.5866% 0.5942
|
||||
22 24533.16 1.1531% 0.5889
|
||||
23 25862.99 1.2548% 0.5850
|
||||
24 19961.15 1.3993% 0.5695
|
||||
25 23721.45 0.7063% 0.5718
|
||||
26 22758.83 0.8053% 0.5699
|
||||
27 26575.09 1.3538% 0.5727
|
||||
28 28424.57 1.0889% 0.5796
|
||||
29 30363.12 1.5732% 0.5756
|
||||
30 38224.65 1.7311% 0.5899
|
||||
31 31137.59 1.3886% 0.5094
|
||||
32 39814.40 1.8755% 0.5482
|
||||
33 41853.15 1.9772% 0.5874
|
||||
34 52107.42 2.3491% 0.5840
|
||||
35 58491.29 1.8193% 0.5410
|
||||
36 77988.16 1.1558% 0.5635
|
||||
37 103052.64 1.1210% 0.5778
|
||||
38 145584.28 1.5143% 0.5889
|
||||
39 640944.31 1.3538% 0.5533
|
||||
Reference in New Issue
Block a user