From a947aa221e2b424af6d47d1e2f8e108329ca454e Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Sun, 14 Jun 2026 00:38:20 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: eaddario/granite-4.1-8b-GGUF Source: Original Platform --- .gitattributes | 38 + .gitignore | 584 +++++++++ Granite_4.1_8b-6f6fd2708e8b1ded.bpw_state | 3 + README.md | 238 ++++ granite-4.1-8b-F16.gguf | 3 + granite-4.1-8b-Q1_L.gguf | 3 + granite-4.1-8b-Q2_K.gguf | 3 + granite-4.1-8b-Q3_K.gguf | 3 + granite-4.1-8b-Q4_K.gguf | 3 + granite-4.1-8b-Q5_K.gguf | 3 + granite-4.1-8b-Q6_K.gguf | 3 + granite-4.1-8b-Q7_K.gguf | 3 + granite-4.1-8b-Q8_0.gguf | 3 + imatrix/imatrix-granite-4.1-8b-medium.gguf | 3 + logits/granite-4.1-8b.logits | 3 + scores/granite-4.1-8b-Q1_L.md | 1247 ++++++++++++++++++++ scores/granite-4.1-8b-Q2_K.md | 1247 ++++++++++++++++++++ scores/granite-4.1-8b-Q3_K.md | 1247 ++++++++++++++++++++ scores/granite-4.1-8b-Q4_K.md | 1247 ++++++++++++++++++++ scores/granite-4.1-8b-Q5_K.md | 1247 ++++++++++++++++++++ scores/granite-4.1-8b-Q6_K.md | 1247 ++++++++++++++++++++ scores/granite-4.1-8b-Q7_K.md | 1247 ++++++++++++++++++++ scores/granite-4.1-8b-Q8_0.md | 1247 ++++++++++++++++++++ scores/granite-4.1-8b-q1_l.arc | 18 + scores/granite-4.1-8b-q1_l.gpqa | 18 + scores/granite-4.1-8b-q1_l.hsw | 17 + scores/granite-4.1-8b-q1_l.mmlu | 18 + scores/granite-4.1-8b-q1_l.ppx | 52 + scores/granite-4.1-8b-q1_l.tqa | 18 + scores/granite-4.1-8b-q1_l.wng | 16 + scores/granite-4.1-8b-q2_k.arc | 21 + scores/granite-4.1-8b-q2_k.gpqa | 21 + scores/granite-4.1-8b-q2_k.hsw | 20 + scores/granite-4.1-8b-q2_k.mmlu | 21 + scores/granite-4.1-8b-q2_k.ppx | 55 + scores/granite-4.1-8b-q2_k.tqa | 21 + scores/granite-4.1-8b-q2_k.wng | 19 + scores/granite-4.1-8b-q3_k.arc | 20 + scores/granite-4.1-8b-q3_k.gpqa | 20 + scores/granite-4.1-8b-q3_k.hsw | 19 + scores/granite-4.1-8b-q3_k.mmlu | 20 + scores/granite-4.1-8b-q3_k.ppx | 54 + scores/granite-4.1-8b-q3_k.tqa | 20 + scores/granite-4.1-8b-q3_k.wng | 18 + scores/granite-4.1-8b-q4_k.arc | 17 + scores/granite-4.1-8b-q4_k.gpqa | 17 + scores/granite-4.1-8b-q4_k.hsw | 16 + scores/granite-4.1-8b-q4_k.mmlu | 17 + scores/granite-4.1-8b-q4_k.ppx | 51 + scores/granite-4.1-8b-q4_k.tqa | 17 + scores/granite-4.1-8b-q4_k.wng | 15 + scores/granite-4.1-8b-q5_k.arc | 17 + scores/granite-4.1-8b-q5_k.gpqa | 17 + scores/granite-4.1-8b-q5_k.hsw | 16 + scores/granite-4.1-8b-q5_k.mmlu | 17 + scores/granite-4.1-8b-q5_k.ppx | 51 + scores/granite-4.1-8b-q5_k.tqa | 17 + scores/granite-4.1-8b-q5_k.wng | 15 + scores/granite-4.1-8b-q6_k.arc | 16 + scores/granite-4.1-8b-q6_k.gpqa | 16 + scores/granite-4.1-8b-q6_k.hsw | 15 + scores/granite-4.1-8b-q6_k.mmlu | 16 + scores/granite-4.1-8b-q6_k.ppx | 50 + scores/granite-4.1-8b-q6_k.tqa | 16 + scores/granite-4.1-8b-q6_k.wng | 14 + scores/granite-4.1-8b-q7_k.arc | 15 + scores/granite-4.1-8b-q7_k.gpqa | 15 + scores/granite-4.1-8b-q7_k.hsw | 14 + scores/granite-4.1-8b-q7_k.mmlu | 15 + scores/granite-4.1-8b-q7_k.ppx | 49 + scores/granite-4.1-8b-q7_k.tqa | 15 + scores/granite-4.1-8b-q7_k.wng | 13 + scores/granite-4.1-8b-q8_0.arc | 16 + scores/granite-4.1-8b-q8_0.gpqa | 16 + scores/granite-4.1-8b-q8_0.hsw | 15 + scores/granite-4.1-8b-q8_0.mmlu | 16 + scores/granite-4.1-8b-q8_0.ppx | 50 + scores/granite-4.1-8b-q8_0.tqa | 16 + scores/granite-4.1-8b-q8_0.wng | 14 + scores/granite-4.1-8b.itx | 332 ++++++ 80 files changed, 12432 insertions(+) create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 Granite_4.1_8b-6f6fd2708e8b1ded.bpw_state create mode 100644 README.md create mode 100644 granite-4.1-8b-F16.gguf create mode 100644 granite-4.1-8b-Q1_L.gguf create mode 100644 granite-4.1-8b-Q2_K.gguf create mode 100644 granite-4.1-8b-Q3_K.gguf create mode 100644 granite-4.1-8b-Q4_K.gguf create mode 100644 granite-4.1-8b-Q5_K.gguf create mode 100644 granite-4.1-8b-Q6_K.gguf create mode 100644 granite-4.1-8b-Q7_K.gguf create mode 100644 granite-4.1-8b-Q8_0.gguf create mode 100644 imatrix/imatrix-granite-4.1-8b-medium.gguf create mode 100644 logits/granite-4.1-8b.logits create mode 100644 scores/granite-4.1-8b-Q1_L.md create mode 100644 scores/granite-4.1-8b-Q2_K.md create mode 100644 scores/granite-4.1-8b-Q3_K.md create mode 100644 scores/granite-4.1-8b-Q4_K.md create mode 100644 scores/granite-4.1-8b-Q5_K.md create mode 100644 scores/granite-4.1-8b-Q6_K.md create mode 100644 scores/granite-4.1-8b-Q7_K.md create mode 100644 scores/granite-4.1-8b-Q8_0.md create mode 100644 scores/granite-4.1-8b-q1_l.arc create mode 100644 scores/granite-4.1-8b-q1_l.gpqa create mode 100644 scores/granite-4.1-8b-q1_l.hsw create mode 100644 scores/granite-4.1-8b-q1_l.mmlu create mode 100644 scores/granite-4.1-8b-q1_l.ppx create mode 100644 scores/granite-4.1-8b-q1_l.tqa create mode 100644 scores/granite-4.1-8b-q1_l.wng create mode 100644 scores/granite-4.1-8b-q2_k.arc create mode 100644 scores/granite-4.1-8b-q2_k.gpqa create mode 100644 scores/granite-4.1-8b-q2_k.hsw create mode 100644 scores/granite-4.1-8b-q2_k.mmlu create mode 100644 scores/granite-4.1-8b-q2_k.ppx create mode 100644 scores/granite-4.1-8b-q2_k.tqa create mode 100644 scores/granite-4.1-8b-q2_k.wng create mode 100644 scores/granite-4.1-8b-q3_k.arc create mode 100644 scores/granite-4.1-8b-q3_k.gpqa create mode 100644 scores/granite-4.1-8b-q3_k.hsw create mode 100644 scores/granite-4.1-8b-q3_k.mmlu create mode 100644 scores/granite-4.1-8b-q3_k.ppx create mode 100644 scores/granite-4.1-8b-q3_k.tqa create mode 100644 scores/granite-4.1-8b-q3_k.wng create mode 100644 scores/granite-4.1-8b-q4_k.arc create mode 100644 scores/granite-4.1-8b-q4_k.gpqa create mode 100644 scores/granite-4.1-8b-q4_k.hsw create mode 100644 scores/granite-4.1-8b-q4_k.mmlu create mode 100644 scores/granite-4.1-8b-q4_k.ppx create mode 100644 scores/granite-4.1-8b-q4_k.tqa create mode 100644 scores/granite-4.1-8b-q4_k.wng create mode 100644 scores/granite-4.1-8b-q5_k.arc create mode 100644 scores/granite-4.1-8b-q5_k.gpqa create mode 100644 scores/granite-4.1-8b-q5_k.hsw create mode 100644 scores/granite-4.1-8b-q5_k.mmlu create mode 100644 scores/granite-4.1-8b-q5_k.ppx create mode 100644 scores/granite-4.1-8b-q5_k.tqa create mode 100644 scores/granite-4.1-8b-q5_k.wng create mode 100644 scores/granite-4.1-8b-q6_k.arc create mode 100644 scores/granite-4.1-8b-q6_k.gpqa create mode 100644 scores/granite-4.1-8b-q6_k.hsw create mode 100644 scores/granite-4.1-8b-q6_k.mmlu create mode 100644 scores/granite-4.1-8b-q6_k.ppx create mode 100644 scores/granite-4.1-8b-q6_k.tqa create mode 100644 scores/granite-4.1-8b-q6_k.wng create mode 100644 scores/granite-4.1-8b-q7_k.arc create mode 100644 scores/granite-4.1-8b-q7_k.gpqa create mode 100644 scores/granite-4.1-8b-q7_k.hsw create mode 100644 scores/granite-4.1-8b-q7_k.mmlu create mode 100644 scores/granite-4.1-8b-q7_k.ppx create mode 100644 scores/granite-4.1-8b-q7_k.tqa create mode 100644 scores/granite-4.1-8b-q7_k.wng create mode 100644 scores/granite-4.1-8b-q8_0.arc create mode 100644 scores/granite-4.1-8b-q8_0.gpqa create mode 100644 scores/granite-4.1-8b-q8_0.hsw create mode 100644 scores/granite-4.1-8b-q8_0.mmlu create mode 100644 scores/granite-4.1-8b-q8_0.ppx create mode 100644 scores/granite-4.1-8b-q8_0.tqa create mode 100644 scores/granite-4.1-8b-q8_0.wng create mode 100644 scores/granite-4.1-8b.itx diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..6c36a56 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,38 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +*.gguf filter=lfs diff=lfs merge=lfs -text +*.logits filter=lfs diff=lfs merge=lfs -text +*.dat filter=lfs diff=lfs merge=lfs -text +*.bpw_state filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..86d94a1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,584 @@ +# https://github.com/github/gitignore + +# ------------- +# General Linux +# ------------- +.directory +.fuse_hidden* +.nfs* +.Trash-* +*~ + +# ----------- +# General OSX +# ----------- +._* +.apdisk +.AppleDB +.AppleDesktop +.AppleDouble +.com.apple.timemachine.donotpresent +.DocumentRevisions-V100 +.DS_Store +.fseventsd +.LSOverride +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +Network Trash Folder +Temporary Items + +# --------------- +# General Windows +# --------------- +[Dd]esktop.ini +*.cab +*.lnk +*.msi +*.msix +*.msm +*.msp +*.stackdump +$RECYCLE.BIN/ +ehthumbs_vista.db +ehthumbs.db +Thumbs.db +Thumbs.db:encryptable + +# ----------------- +# General JetBrains +# ----------------- +.idea_modules/ +.idea/ +*.iml +*.ipr +*.iws + +# --------------- +# General VS Code +# --------------- +!.vscode/*.code-snippets +!.vscode/extensions.json +!.vscode/launch.json +!.vscode/settings.json +!.vscode/tasks.json +.history/ +.vscode/ +*.vsix + +# --------------------- +# General Visual Studio +# --------------------- +__pycache__/ +_NCrunch_* +_pkginfo.txt +_Pvt_Extensions +_ReSharper*/ +_TeamCity* +_UpgradeReport_Files/ +!?*.[Cc]ache/ +!.vscode/extensions.json +!**/[Pp]ackages/build/ +.*crunch*.local.xml +.builds +.cr/personal +.fake/ +.ionide/ +.localhistory/ +.mfractor/ +.ntvs_analysis.dat +.paket/paket.exe +.sass-cache/ +.vs/ +.vscode/* +.vshistory/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +[Bb]in/ +[Bb]uild[Ll]og.* +[Dd]ebug/ +[Dd]ebugPS/ +[Dd]ebugPublic/ +[Ee]xpress/ +[Ll]og/ +[Ll]ogs/ +[Oo]bj/ +[Rr]elease/ +[Rr]eleasePS/ +[Rr]eleases/ +[Tt]est[Rr]esult*/ +[Ww][Ii][Nn]32/ +*_h.h +*_i.c +*_p.c +*_wpftmp.csproj +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl +*- [Bb]ackup.rdl +*.[Cc]ache +*.[Pp]ublish.xml +*.[Rr]e[Ss]harper +*.appx +*.appxbundle +*.appxupload +*.aps +*.azurePubxml +*.bim_*.settings +*.bim.layout +*.binlog +*.btm.cs +*.btp.cs +*.build.csdef +*.cachefile +*.code-workspace +*.coverage +*.coveragexml +*.dbmdl +*.dbproj.schemaview +*.dotCover +*.DotSettings.user +*.dsp +*.dsw +*.e2e +*.GhostDoc.xml +*.gpState +*.ilk +*.iobj +*.ipdb +*.jfm +*.jmconfig +*.ldf +*.mdf +*.meta +*.mm.* +*.ncb +*.ndf +*.nuget.props +*.nuget.targets +*.nupkg +*.nvuser +*.obj +*.odx.cs +*.opendb +*.opensdf +*.opt +*.pch +*.pdb +*.pfx +*.pgc +*.pgd +*.pidb +*.plg +*.psess +*.publishproj +*.publishsettings +*.pubxml +*.pyc +*.rdl.data +*.rptproj.bak +*.rptproj.rsuser +*.rsp +*.rsuser +*.sap +*.sbr +*.scc +*.sdf +*.sln.docstates +*.sln.iml +*.snupkg +*.suo +*.svclog +*.tlb +*.tlh +*.tli +*.tlog +*.tmp +*.tmp_proj +*.tss +*.user +*.userosscache +*.userprefs +*.vbp +*.vbw +*.VC.db +*.VC.VC.opendb +*.VisualState.xml +*.vsp +*.vspscc +*.vspx +*.vssscc +*.xsd.cs +**/[Pp]ackages/* +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.HTMLClient/GeneratedArtifacts +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +~$* +$tf/ +AppPackages/ +artifacts/ +ASALocalRun/ +AutoTest.Net/ +Backup*/ +BenchmarkDotNet.Artifacts/ +bld/ +BundleArtifacts/ +ClientBin/ +coverage*.info +coverage*.json +coverage*.xml +csx/ +dlldata.c +DocProject/buildhelp/ +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/*.HxC +DocProject/Help/*.HxT +DocProject/Help/html +DocProject/Help/Html2 +ecf/ +FakesAssemblies/ +FodyWeavers.xsd +Generated_Code/ +healthchecksdb +ipch/ +MigrationBackup/ +mono_crash.* +nCrunchTemp_* +node_modules/ +nunit-*.xml +OpenCover/ +orleans.codegen.cs +Package.StoreAssociation.xml +paket-files/ +project.fragment.lock.json +project.lock.json +publish/ +PublishScripts/ +rcf/ +ScaffoldingReadMe.txt +ServiceFabricBackup/ +StyleCopReport.xml +TestResult.xml +UpgradeLog*.htm +UpgradeLog*.XML +x64/ +x86/ + +# ---------------------- +# General Archived Files +# ---------------------- +*.7z +*.bz2 +*.bzip +*.bzip2 +*.deb +*.dmg +*.egg +*.gem +*.gz +*.gzip +*.iso +*.jar +*.lzma +*.rar +*.rpm +*.tar +*.tgz +*.txz +*.tzst +*.xar +*.xpi +*.xz +*.zip +*.zst + +# ----- +# C/C++ +# ----- +.tmp_versions/ +*.a +*.app +*.cmd +*.d +*.dll +*.dSYM/ +*.dylib +*.elf +*.exe +*.exp +*.gch +*.hex +*.i*86 +*.idb +*.ko +*.la +*.lai +*.lib +*.lo +*.map +*.mod* +*.o +*.out +*.slo +*.so +*.so.* +*.su +*.x86_64 +dkms.conf +Mkfile.old +Module.symvers +modules.order + +# ---- +# CUDA +# ---- +*.cubin +*.fatbin +*.gpu +*.i +*.ii +*.ptx + +# -- +# Go +# -- +.env +*.exe~ +*.test +go.work +go.work.sum + +# ---- +# Java +# ---- +*.class +*.ctxt +*.ear +*.hprof +*.nar +*.tar.gz +*.war +hs_err_pid* +replay_pid* + +# ----- +# Julia +# ----- +*.jl.*.cov +*.jl.cov +*.jl.mem +deps/build.log +deps/deps.jl +deps/downloads/ +deps/src/ +deps/usr/ +docs/build/ +docs/site/ +Manifest.toml + +# ------------------------------ +# JavaScript / Node / TypeScript +# ------------------------------ +.cache +.cache/ +.docusaurus +.dynamodb/ +.env.development.local +.env.local +.env.production.local +.env.test.local +.eslintcache +.fusebox/ +.grunt +.lock-wscript +.next +.node_repl_history +.npm +.nuxt +.nyc_output +.parcel-cache +.pnp.* +.pnpm-debug.log* +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ +.serverless/ +.stylelintcache +.temp +.tern-port +.vscode-test +.vuepress/dist +.yarn-integrity +.yarn/build-state.yml +.yarn/cache +.yarn/install-state.gz +.yarn/unplugged +*.lcov +*.pid +*.pid.lock +*.seed +*.tsbuildinfo +bower_components +dist +jspm_packages/ +lerna-debug.log* +lib-cov +logs +npm-debug.log* +out +pids +report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json +web_modules/ +yarn-debug.log* +yarn-error.log* + +# ------ +# Python +# ------ +__pypackages__/ +.coverage +.coverage.* +.dmypy.json +.eggs/ +.hypothesis/ +.installed.cfg +.ipynb_checkpoints +.mypy_cache/ +.nox/ +.pdm-build/ +.pdm-python +.pdm.toml +.pybuilder/ +.pyre/ +.pytest_cache/ +.Python +.python-version +.pytype/ +.ropeproject +.scrapy +.spyderproject +.spyproject +.tox/ +.venv +.webassets-cache +*.cover +*.egg-info/ +*.manifest +*.mo +*.pot +*.py,cover +*.py[cod] +*.sage.py +*.spec +*$py.class +/site +build/ +celerybeat-schedule +celerybeat.pid +cover/ +coverage.xml +cython_debug/ +db.sqlite3 +db.sqlite3-journal +develop-eggs/ +dist/ +dmypy.json +docs/_build/ +downloads/ +eggs/ +env.bak/ +env/ +htmlcov/ +instance/ +ipython_config.py +lib/ +lib64/ +local_settings.py +MANIFEST +nosetests.xml +parts/ +pip-delete-this-directory.txt +pip-log.txt +profile_default/ +sdist/ +share/python-wheels/ +target/ +var/ +venv.bak/ +venv/ +wheels/ + +# ---- +# Rust +# ---- +**/*.rs.bk +Cargo.lock +debug/ + +# ----- +# Scala +# ----- + +# ----- +# CMake +# ----- +_deps +cmake_install.cmake +CMakeCache.txt +CMakeFiles +CMakeLists.txt.user +CMakeScripts +CMakeUserPresets.json +compile_commands.json +CTestTestfile.cmake +install_manifest.txt +Makefile +Testing + +# ------ +# Gradle +# ------ +!gradle-wrapper.jar +!gradle-wrapper.properties +!src/**/build/ +.classpath +.gradle +.gradletasknamecache +.project +**/build/ +gradle-app.setting + +# ----- +# Maven +# ----- +.mvn/ +buildNumber.properties +dependency-reduced-pom.xml +pom.xml.next +pom.xml.releaseBackup +pom.xml.tag +pom.xml.versionsBackup +release.properties + +# --------- +# Terraform +# --------- +.terraform.tfstate.lock.info +.terraform/ +.terraformrc +*_override.tf +*_override.tf.json +*.tfstate +*.tfstate.* +*.tfvars +*.tfvars.json +crash.*.log +crash.log +override.tf +override.tf.json +terraform.rc \ No newline at end of file diff --git a/Granite_4.1_8b-6f6fd2708e8b1ded.bpw_state b/Granite_4.1_8b-6f6fd2708e8b1ded.bpw_state new file mode 100644 index 0000000..4959bc2 --- /dev/null +++ b/Granite_4.1_8b-6f6fd2708e8b1ded.bpw_state @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ab793fef9b02f0b52d04ae0a0bc046d75406ba58d9291b3439fbc5f1f0519fe +size 99424 diff --git a/README.md b/README.md new file mode 100644 index 0000000..0019046 --- /dev/null +++ b/README.md @@ -0,0 +1,238 @@ +--- +base_model: +- ibm-granite/granite-4.1-8b +datasets: +- eaddario/imatrix-calibration +language: +- en +license: +- apache-2.0 +pipeline_tag: text-generation +tags: +- gguf +- quant +- target_bpw +- experimental +--- + +# Experimental global target bits‑per‑weight quantization of [ibm-granite/granite-4.1-8b](https://huggingface.co/ibm-granite/granite-4.1-8b) +Using **non-standard** (forked) [LLaMA C++][llm] release [b9358][llm-rel] for quantization. + +Original model: [ibm-granite/granite-4.1-8b][mdl] + +From the original model creators: +> [![mof-class3-qualified](https://mot.isitopen.ai/modules/mof/assets/badge_class3_qualified.png)](https://mot.isitopen.ai/model/1160) +> +> # Granite-4.1-8B +> +> **Model Summary:** +> Granite-4.1-8B is a 8B parameter long-context instruct model finetuned from *Granite-4.1-8B-Base* using a combination of open source instruction datasets with permissive license and internally collected synthetic datasets. Granite 4.1 models have gone through an improved post-training pipeline, including supervised finetuning and reinforcement learning alignment, resulting in enhanced tool calling, instruction following, and chat capabilities. +> +> - **Developers:** Granite Team, IBM +> - **HF Collection:** [Granite 4.1 Language Models HF Collection](https://huggingface.co/collections/ibm-granite/granite-41-language-models) +> - **Technical Blog:** [Granite-4.1 Blog](https://huggingface.co/blog/ibm-granite/granite-4-1) +> - **GitHub Repository:** [ibm-granite/granite-4.1-language-models](https://github.com/ibm-granite/granite-4.1-language-models) +> - **Website**: [Granite Docs](https://www.ibm.com/granite/docs/) +> - **Release Date**: April 29th, 2026 +> - **License:** [Apache 2.0](https://www.apache.org/licenses/LICENSE-2.0) +> +> **Supported Languages:** +> English, German, Spanish, French, Japanese, Portuguese, Arabic, Czech, Italian, Korean, Dutch, and Chinese. Users may finetune Granite 4.1 models for languages beyond these languages. +> +> **Intended use:** +> The model is designed to follow general instructions and can serve as the foundation for AI assistants across diverse domains, including business applications, as well as for LLM agents equipped with tool-use capabilities. + +--- + +# ⚠️ PLEASE READ THIS BEFORE USING THESE EXPERIMENTAL VERSIONS! ⚠️ +An area of personal interest is finding ways to optimize the inference performance of LLMs when deployed in resource-constrained environments like commodity hardware, desktops, laptops, mobiles, edge devices, etc. There are many approaches to accomplish this, including architecture simplification and knowledge distillation, but my focus has been primarily on quantization and pruning. + +The method to produce these experimental versions involves using a custom version of [`llama-imatrix`][imx] to generate an imatrix that includes tensor statistics, and a custom version of [`llama-quantize`][qtz], which computes a per-tensor quantization error, to automatically select the lowest error quantization recipe that achieves a global target bits‑per‑weight (bpw). More details on the implementation and test results [here][bpw] + +There are two pull requests ([#14891][imtx-pr] & [#15550][qtz-pr]) to merge these changes back into the core llama.cpp project. This may or may not ever happen so, until then, the modified versions will be available on [GitHub][gh]. + +For testing and comparison, I use models produced by [Bartowski][btk] (see credits below) and [Unsloth][ust] ([Daniel and Michael Han][ust-ai] do some really interesting stuff!) but when they don't provide versions of the required model, tests and comparisons are against standard quantization obtained by simply running `llama-quantize` with no further optimizations. + +All experimental versions were generated using an appropriate imatrix created from datasets available at [eaddario/imatrix-calibration][ical]. In `llama.cpp`, an imatrix is a calibration file derived from running representative text through the model and collecting activation statistics. It is used to weight quantization error so that error in more “important” directions (as estimated from activations) is penalized more heavily. + +The process to generate these models is roughly as follows: +1. Convert the original model's [safetensors][sfts] to [GGUF][ggf] F16 +2. Estimate the [Perplexity][ppl] score for the F16 model (baseline) using the [wikitext-2-raw-v1][wki-dat] dataset, and save the [logits][lgt] +3. Generate an [imatrix][imx-dat] from the most appropriate [calibration dataset][ical] +4. Quantize the baseline model targeting a bpw average (e.g. `llama-quantize --target-bpw 4.5678 --state-file --imatrix imatrix.gguf baseline-model-F16.gguf 12`) +5. Calculate Perplexity, KL Divergence, ARC (Easy+Challenge), GPQA-Diamond, HellaSwag, MMLU-Redux, Truthful QA and WinoGrande scores for each quantized model +6. Keep version with the best 𝜌PPL and μKLD scores +7. Repeat until all desired quants are created + +### Misconceptions about BF16 to F16 Conversion +A common concern when converting BFloat16 ([BF16][bf16]) models to Float16 (F16) is the potential for accuracy loss. Specifically: +- Weight Clipping (Overflow): Clipping, or overflow, is often feared but only occurs if a model's weights exceed the range of ±65,503. This is a relatively rare issue in practice. +- Subnormal Zeroing (Underflow): A more frequent occurrence is underflow, where weights smaller than approximately 5.96x10⁻⁸ are converted to zero. + +Crucially, when the F16 model is subsequently used for quantization, the resulting degradation in metrics like Perplexity ([PPL][ppl]) or Kullback–Leibler Divergence ([KLD][kld]) is minimal. Any variations are typically restricted to the hundreds or thousandths decimal places compared to the BF16 model. + +However, considering that weight clipping presents a more substantial risk to model integrity, every BF16 base model undergoes validation prior to the conversion process. Consequently, no models hosted in this repository exhibit performance degradation due to overflow clipping. + +While BF16 offers precision benefits, performance remains a key factor. +- Conversion Speed: Tests, such as timing `convert_hf_to_gguf.py`, show a notable performance difference, with conversion to BF16 being 15–30% slower than to F16. +- Inference Speed: A less pronounced but still present difference (3–6%) is observed during inference. Although native BF support has been introduced by many chip manufacturers, the slower performance **may** stem from the entire software and hardware stack (firmware, libraries, etc.) not being fully optimized yet. + +The choice to prioritize F16 over BF16 is driven by a focus on maximizing performance in specific deployment environments. My primary objective is not large-scale quantization production, a domain where others like [Bartowski][btk] and [Unsloth][ust] excel at, but rather optimizing inference performance for resource-constrained environments. Since BF16 support is not yet widespread in areas like mobile, edge, and embedded devices, using F16 ensures broader compatibility and easier optimization for these use cases. + +# Advantages and disadvantages of the global target bits‑per‑weight quantization process +### Advantages +1. **Target arbitrary size models** + - When specifying `--target-bpw 4.5678` for instance, the algorithm will produce a model (nearly) exactly of that size, which is very useful for maximizing VRAM usage. In a system with 24GB VRAM and a 70B model, standard quants might produce a 16.8GB file (too small, quality left on table) or a 24.1GB file (won't fit). This approach can generate a 23.85GB file to utilize the hardware fully. + +2. **Data-driven mixed precision often can improve quality at fixed size** + - Instead of using hardcoded heuristics (e.g. make `attn_v` Q5_K for a 70B model), that may be sub‑optimal for a given architecture or size, the quantization mix is determined by the actual error sensitivity of the specific model's weights. This, in practice, often yields a better quality/size trade-off, especially in aggressive quantization scenarios (1.5 to 3.5 bpw), or for unusual architectures. + + - **Please note**: `llama.cpp`’s heuristics have been tuned across many models and are highly optimized; although the target bpw method produces better quality often (>75% based on tests with 130 models from 11 different families), it can also lose in surprising cases. + +3. **Allows better like-for-like comparisons between models and families** + - Standard `llama.cpp` quantization uses hardcoded rules like: *"use Q4_K_M, except bump some tensors up/down, except fall back if incompatible, except keep some tensors unquantized..."* and for that reason, two different models quantized with the same Q4_K_M type can end up with very different bpw (e.g. 4.75 and 4.30). + + - All things being equal, the performance of a model is usually proportional to its overall bpw size; models with a higher bpw tend to perform better than lower bpw models. Since model A has simply been given more bits, it will typically perform better (lower perplexity, better eval scores, etc.) even if the underlying quantization method is identical. That makes comparing the performance not a controlled experiment, because the comparison is between models with different effective compression ratios. + + - `--target-bpw` tries to address that by making the experiment more controlled: each model gets quantized to land on (approximately) the same global byte budget, so that the models' performance differences are more attributable to architecture/training differences, quantization error behaviour at the same compression ratio, optimizer’s allocation decisions, etc. + +### Disadvantages +1. **Quantization process is significantly slower than standard** + - This approach can take 5x-10x longer as it quantizes a sample of most tensors into 15 different formats, dequantizes them back to floats, computes error diffs, and selects the best size/error option that fits the global bpw budget. + + - However, the `--state-file` option will save/use the above-mentioned computations so that future quantizations, for the same model, can be generated at normal speed. It also allows to interrupt the computation process and resume it at a later time. + +2. **The optimization target is only a proxy for the model's performance quality** + - The process minimizes a per-tensor estimated error computed from sampled rows, not actual perplexity or divergence of output distributions (a future version may address this). Since errors interact nonlinearly across layers, there are no guarantees it will select the best possible quantization recipe subject to the bpw size constraint. + +3. **An imatrix with activations data is required for best results** + - Activation data is required to compute the bias factor (i.e. the systematic error projected onto activation directions). If the imatrix file does not contain activation data, the `--target-bpw` option will refuse to run. + +--- + +# Models +### Bits per weight, size, perplexity and KL Divergence scores +| Model | BPW | Size (GB) | μPPL | 𝜌PPL | μKLD | Same Top-P | +| ------------------------------------------------- | ------: | --------: | ------------------: | -----: | -----------------: | ------------: | +| [granite-4.1-8b-F16](./granite-4.1-8b-F16.gguf) | 16.0006 | 17.6 | 8.691178 ±0.065443 | 100% | N/A | N/A | +| [granite-4.1-8b-Q2_K](./granite-4.1-8b-Q1_L.gguf) | 1.7500 | 1.93 | 87.318832 ±0.781580 | 57.61% | 2.889523 ±0.005948 | 34.309 ±0.125 | +| [granite-4.1-8b-Q2_K](./granite-4.1-8b-Q2_K.gguf) | 2.5000 | 2.75 | 12.534216 ±0.095606 | 86.12% | 0.644965 ±0.002755 | 67.231 ±0.124 | +| [granite-4.1-8b-Q3_K](./granite-4.1-8b-Q3_K.gguf) | 3.5000 | 3.85 | 9.381594 ±0.070128 | 96.18% | 0.173887 ±0.001079 | 82.732 ±0.100 | +| [granite-4.1-8b-Q4_K](./granite-4.1-8b-Q4_K.gguf) | 4.4999 | 4.95 | 8.867438 ±0.067303 | 98.88% | 0.047917 ±0.000392 | 90.937 ±0.076 | +| [granite-4.1-8b-Q5_K](./granite-4.1-8b-Q5_K.gguf) | 5.4999 | 6.05 | 8.766150 ±0.066421 | 99.48% | 0.018940 ±0.000165 | 94.120 ±0.062 | +| [granite-4.1-8b-Q6_K](./granite-4.1-8b-Q6_K.gguf) | 6.4998 | 7.15 | 8.755199 ±0.066400 | 99.74% | 0.007326 ±0.000066 | 96.165 ±0.051 | +| [granite-4.1-8b-Q7_K](./granite-4.1-8b-Q7_K.gguf) | 7.4998 | 8.25 | 8.751241 ±0.066500 | 99.82% | 0.003568 ±0.000040 | 97.235 ±0.043 | +| [granite-4.1-8b-Q8_0](./granite-4.1-8b-Q8_0.gguf) | 8.4988 | 9.34 | 8.749119 ±0.066517 | 99.85% | 0.002052 ±0.000024 | 97.749 ±0.039 | + +### ARC, GPQA-Diamond, HellaSwag, MMLU-Redux, Truthful QA, and WinoGrande scores +Scores generated using [llama-perplexity][ppl] with 750 tasks per test, and a context size of 1024 tokens. + +For the test data used in the generation of these scores, follow the appropriate links: [ARC Challenge, Truthful QA][tst-dat], [GPQA-Diamond][gpqa-dat], [HellaSwag][hsw-tst], [MMLU-Redux][mrdx], [WinoGrande][wng-tst] + +| Model | ARC Challenge | GPQA-Diamond | HellaSwag | MMLU-Redox | Truthful QA | WinoGrande | Avg Score | +| ------------------------------------------------- | --------------: | --------------: | --------: | --------------: | --------------: | --------------: | --------: | +| [granite-4.1-8b-Q1_L](./granite-4.1-8b-Q1_L.gguf) | 36.5333 ±1.7594 | 19.1919 ±2.8058 | 36.00 | 27.2000 ±1.6260 | 28.9333 ±1.6569 | 52.5333 ±1.8246 | 33.40 | +| [granite-4.1-8b-Q2_K](./granite-4.1-8b-Q2_K.gguf) | 60.4000 ±1.7870 | 29.7980 ±3.2586 | 70.00 | 59.2000 ±1.7958 | 33.4667 ±1.7242 | 65.2000 ±1.7405 | 53.01 | +| [granite-4.1-8b-Q3_K](./granite-4.1-8b-Q3_K.gguf) | 62.0000 ±1.7736 | 21.7172 ±2.9377 | 79.33 | 69.2000 ±1.6869 | 39.6000 ±1.7870 | 71.7333 ±1.6453 | 57.26 | +| [granite-4.1-8b-Q4_K](./granite-4.1-8b-Q4_K.gguf) | 66.9333 ±1.7190 | 23.2323 ±3.0089 | 79.73 | 71.4667 ±1.6500 | 38.9333 ±1.7816 | 73.4667 ±1.6132 | 58.96 | +| [granite-4.1-8b-Q5_K](./granite-4.1-8b-Q5_K.gguf) | 66.4000 ±1.7259 | 22.7273 ±2.9858 | 79.87 | 72.1333 ±1.6382 | 38.5333 ±1.7783 | 73.4667 ±1.6132 | 58.86 | +| [granite-4.1-8b-Q6_K](./granite-4.1-8b-Q6_K.gguf) | 67.0667 ±1.7172 | 24.7475 ±3.0746 | 80.13 | 72.6667 ±1.6284 | 38.2667 ±1.7759 | 73.7333 ±1.6080 | 59.44 | +| [granite-4.1-8b-Q7_K](./granite-4.1-8b-Q7_K.gguf) | 66.4000 ±1.7259 | 26.7677 ±3.1544 | 80.27 | 72.1333 ±1.6382 | 38.5333 ±1.7783 | 73.6000 ±1.6106 | 59.62 | +| [granite-4.1-8b-Q8_0](./granite-4.1-8b-Q8_0.gguf) | 66.8000 ±1.7207 | 26.7677 ±3.1544 | 80.53 | 72.4000 ±1.6334 | 38.4000 ±1.7771 | 73.2000 ±1.6184 | 59.68 | + +### Tokens per second benchmarks +Scores generated using [llama-bench][bch]. Standard (`llama-quantize` with no optimization) Q4_K_M quantization included for comparison. + +| model | size | params | backend | threads | test | t/s | +| ------------------------------------------------- | -------: | -----: | -------- | ------: | ------------: | ------------: | +| [granite-4.1-8b-Q1_L](./granite-4.1-8b-Q1_L.gguf) | 1.79 GiB | 8.79 B | BLAS,MTL | 12 | pp512 | 783.11 ±0.52 | +| [granite-4.1-8b-Q1_L](./granite-4.1-8b-Q1_L.gguf) | 1.79 GiB | 8.79 B | BLAS,MTL | 12 | tg128 | 68.68 ±0.17 | +| [granite-4.1-8b-Q1_L](./granite-4.1-8b-Q1_L.gguf) | 1.79 GiB | 8.79 B | BLAS,MTL | 12 | pp1024+tg1024 | 108.35 ±1.28 | +| [granite-4.1-8b-Q2_K](./granite-4.1-8b-Q2_K.gguf) | 2.56 GiB | 8.79 B | BLAS,MTL | 12 | pp512 | 728.97 ±10.22 | +| [granite-4.1-8b-Q2_K](./granite-4.1-8b-Q2_K.gguf) | 2.56 GiB | 8.79 B | BLAS,MTL | 12 | tg128 | 68.76 ±0.21 | +| [granite-4.1-8b-Q2_K](./granite-4.1-8b-Q2_K.gguf) | 2.56 GiB | 8.79 B | BLAS,MTL | 12 | pp1024+tg1024 | 108.98 ±0.24 | +| [granite-4.1-8b-Q3_K](./granite-4.1-8b-Q3_K.gguf) | 3.58 GiB | 8.79 B | BLAS,MTL | 12 | pp512 | 733.45 ±9.51 | +| [granite-4.1-8b-Q3_K](./granite-4.1-8b-Q3_K.gguf) | 3.58 GiB | 8.79 B | BLAS,MTL | 12 | tg128 | 63.63 ±1.20 | +| [granite-4.1-8b-Q3_K](./granite-4.1-8b-Q3_K.gguf) | 3.58 GiB | 8.79 B | BLAS,MTL | 12 | pp1024+tg1024 | 94.51 ±1.15 | +| [granite-4.1-8b-Q4_K](./granite-4.1-8b-Q4_K.gguf) | 4.61 GiB | 8.79 B | BLAS,MTL | 12 | pp512 | 771.63 ±0.97 | +| [granite-4.1-8b-Q4_K](./granite-4.1-8b-Q4_K.gguf) | 4.61 GiB | 8.79 B | BLAS,MTL | 12 | tg128 | 66.33 ±1.24 | +| [granite-4.1-8b-Q4_K](./granite-4.1-8b-Q4_K.gguf) | 4.61 GiB | 8.79 B | BLAS,MTL | 12 | pp1024+tg1024 | 105.98 ±4.76 | +| [granite-4.1-8b-Q5_K](./granite-4.1-8b-Q5_K.gguf) | 5.63 GiB | 8.79 B | BLAS,MTL | 12 | pp512 | 673.26 ±34.19 | +| [granite-4.1-8b-Q5_K](./granite-4.1-8b-Q5_K.gguf) | 5.63 GiB | 8.79 B | BLAS,MTL | 12 | tg128 | 51.29 ±3.09 | +| [granite-4.1-8b-Q5_K](./granite-4.1-8b-Q5_K.gguf) | 5.63 GiB | 8.79 B | BLAS,MTL | 12 | pp1024+tg1024 | 83.45 ±2.31 | +| [granite-4.1-8b-Q6_K](./granite-4.1-8b-Q6_K.gguf) | 6.65 GiB | 8.79 B | BLAS,MTL | 12 | pp512 | 703.41 ±23.92 | +| [granite-4.1-8b-Q6_K](./granite-4.1-8b-Q6_K.gguf) | 6.65 GiB | 8.79 B | BLAS,MTL | 12 | tg128 | 52.12 ±1.38 | +| [granite-4.1-8b-Q6_K](./granite-4.1-8b-Q6_K.gguf) | 6.65 GiB | 8.79 B | BLAS,MTL | 12 | pp1024+tg1024 | 87.04 ±0.22 | +| [granite-4.1-8b-Q7_K](./granite-4.1-8b-Q7_K.gguf) | 7.68 GiB | 8.79 B | BLAS,MTL | 12 | pp512 | 614.53 ±0.48 | +| [granite-4.1-8b-Q7_K](./granite-4.1-8b-Q7_K.gguf) | 7.68 GiB | 8.79 B | BLAS,MTL | 12 | tg128 | 49.47 ±0.59 | +| [granite-4.1-8b-Q7_K](./granite-4.1-8b-Q7_K.gguf) | 7.68 GiB | 8.79 B | BLAS,MTL | 12 | pp1024+tg1024 | 83.45 ±0.24 | +| [granite-4.1-8b-Q8_0](./granite-4.1-8b-Q8_0.gguf) | 8.70 GiB | 8.79 B | BLAS,MTL | 12 | pp512 | 800.32 ±0.73 | +| [granite-4.1-8b-Q8_0](./granite-4.1-8b-Q8_0.gguf) | 8.70 GiB | 8.79 B | BLAS,MTL | 12 | tg128 | 46.66 ±0.04 | +| [granite-4.1-8b-Q8_0](./granite-4.1-8b-Q8_0.gguf) | 8.70 GiB | 8.79 B | BLAS,MTL | 12 | pp1024+tg1024 | 77.87 ±0.30 | + +# Metrics used +**[Perplexity][ppx]:** one of the key metrics used in NLP evaluation. It measures the quality of a language model by evaluating how well it predicts the next token given a particular sequence of words. A PPL of **1** indicates an exact match between predicted and actual, whereas values greater than one indicate a degree of "surprise" the generated token differs from the expected. + +**[Kullback–Leibler (KL) Divergence][kld]:** a statistical measure of how much a probability distribution differs from another. When quantizing models (or altering the original tensors in any way for that matter), the closest we can preserve the weights' probability distribution to the original model the better, thus the closest to **0** the better. + +**[AI2 Reasoning Challenge (ARC)][arc]:** a benchmark to evaluate the ability of AI models to answer complex science questions that require logical reasoning beyond pattern matching. + +**[GPQA-Diamond][gpqa]:** a challenging dataset of 448 multiple-choice questions written by domain experts in biology, physics, and chemistry. + +**[HellaSwag][hsw]:** the Harder Endings, Longer contexts, and Low-shot Activities for Situations With Adversarial Generations (bit of a mouthful!) is a benchmark designed to test commonsense natural language inference. It requires the model to predict the most likely ending of a sentence. + +**[MMLU][mmlu]:** the Massive Multitask Language Understanding evaluates LLMs’ general knowledge and problem-solving abilities across 57 subjects, including elementary mathematics, US history, computer science, and law. + +**[Truthful QA][tqa]:** evaluates how well LLMs generate truthful responses to questions. It identifies whether AI models can avoid generating false or misleading information, particularly in areas where human knowledge is prone to misconceptions. + +**[Winogrande][wng]:** based on the [Winograd Schema Challenge][wng-chl], is a natural language understanding task requiring models to resolve ambiguities in sentences involving pronoun references. + +## Credits +[LLaMa C++][llm] has a large and vibrant community of [contributors][llm-ctt] (~1,600 last time I checked) that actively maintain and extend its functionality, adding new models and architectures almost as fast as they appear. Considering the breakneck speed at which the AI/ML field is advancing, this alone is a remarkable feat! + +While I'm grateful to all contributors, I want to recognise three in particular: +* [Colin Kealty][btk] (Bartowski), for the many contributions and for being one of the best sources of high quality quantized models available on Hugging Face +* [Georgi Gerganov][ggg] for his amazing work with **llama.cpp** and the **ggml/gguf** libraries +* [Iwan Kawrakow][ikk] for being one of the key authors behind the many quantization algorithms and the imatrix functionality. + +[arc]: https://llm-stats.com/benchmarks/ai2-reasoning-challenge-(arc) +[base]: https://huggingface.co/ibm-granite/granite-4.1-8b +[b-q4km]: https://huggingface.co/bartowski +[bch]: https://github.com/ggml-org/llama.cpp/tree/master/tools/llama-bench +[bf16]: https://en.wikipedia.org/wiki/Bfloat16_floating-point_format +[bpw]: https://github.com/ggml-org/llama.cpp/discussions/18531 +[btk]: https://huggingface.co/bartowski +[ggf]: https://huggingface.co/docs/hub/en/gguf +[ggg]: https://github.com/ggerganov +[gh]: https://github.com/EAddario/llama.cpp/tree/master +[gpqa]: https://arxiv.org/abs/2311.12022 +[gpqa-dat]: https://huggingface.co/datasets/eaddario/benchmark +[hsw-tst]: https://github.com/klosax/hellaswag_text_data +[hsw]: https://rowanzellers.com/hellaswag +[ical]: https://huggingface.co/datasets/eaddario/imatrix-calibration +[ikk]: https://github.com/ikawrakow +[imtx-pr]: https://github.com/ggml-org/llama.cpp/pull/14891 +[imx-dat]: https://huggingface.co/eaddario/granite-4.1-8b-GGUF/tree/main/imatrix +[imx]: https://github.com/EAddario/llama.cpp/tree/imatrix +[kld]: https://en.wikipedia.org/wiki/Kullback–Leibler_divergence +[lgt]: https://huggingface.co/eaddario/granite-4.1-8b-GGUF/tree/main/logits +[llm-ctt]: https://github.com/ggml-org/llama.cpp/graphs/contributors +[llm-rel]: https://github.com/ggml-org/llama.cpp/releases/tag/b9358 +[llm]: https://github.com/ggerganov/llama.cpp +[mdl]: https://huggingface.co/ibm-granite/granite-4.1-8b +[mmlu]: https://en.wikipedia.org/wiki/MMLU +[mrdx]: https://huggingface.co/datasets/Green-Sky/mmlu-redux-2.0-for-llama.cpp +[ppl]: https://github.com/ggml-org/llama.cpp/tree/master/tools/perplexity +[ppx]: https://huggingface.co/docs/transformers/en/perplexity +[qtz-pr]: https://github.com/ggml-org/llama.cpp/pull/15550 +[qtz]: https://github.com/EAddario/llama.cpp/tree/quantize +[sfts]: https://huggingface.co/docs/safetensors/en/index +[tqa]: https://github.com/sylinrl/TruthfulQA +[tst-dat]: https://huggingface.co/datasets/ikawrakow/validation-datasets-for-llama.cpp/tree/main +[u-q4km]: https://huggingface.co/unsloth +[ust-ai]: https://unsloth.ai +[ust]: https://huggingface.co/unsloth +[wki-dat]: https://huggingface.co/datasets/Salesforce/wikitext/tree/main/wikitext-2-raw-v1 +[wng-chl]: https://cdn.aaai.org/ocs/4492/4492-21843-1-PB.pdf +[wng-tst]: https://huggingface.co/datasets/ikawrakow/winogrande-eval-for-llama.cpp/tree/main +[wng]: https://winogrande.allenai.org diff --git a/granite-4.1-8b-F16.gguf b/granite-4.1-8b-F16.gguf new file mode 100644 index 0000000..6bd7fc2 --- /dev/null +++ b/granite-4.1-8b-F16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:041a4adb5827786916837f193e624ac4fced3e7c0eb9458708f8e39a50c80640 +size 17587417856 diff --git a/granite-4.1-8b-Q1_L.gguf b/granite-4.1-8b-Q1_L.gguf new file mode 100644 index 0000000..de145f4 --- /dev/null +++ b/granite-4.1-8b-Q1_L.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dc3864de95c7f4d5568718625e58cf54d4046545d2bf397eb80970f0bfc7487 +size 1926722560 diff --git a/granite-4.1-8b-Q2_K.gguf b/granite-4.1-8b-Q2_K.gguf new file mode 100644 index 0000000..703641a --- /dev/null +++ b/granite-4.1-8b-Q2_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c149301db1c2da459842becd06edee4d695bdaa98ab19b2e72a8e77dca283a6f +size 2750919680 diff --git a/granite-4.1-8b-Q3_K.gguf b/granite-4.1-8b-Q3_K.gguf new file mode 100644 index 0000000..0f66f49 --- /dev/null +++ b/granite-4.1-8b-Q3_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60b1874b8af548c0e7e265e025a1c0847afce50529defa47e090b2930cc0f2c1 +size 3849876480 diff --git a/granite-4.1-8b-Q4_K.gguf b/granite-4.1-8b-Q4_K.gguf new file mode 100644 index 0000000..4ca033a --- /dev/null +++ b/granite-4.1-8b-Q4_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2253b2e0b33a5ae20ea3acfe4e2a0cc44e1ead7ca440997db710afc1f2e63005 +size 4948784128 diff --git a/granite-4.1-8b-Q5_K.gguf b/granite-4.1-8b-Q5_K.gguf new file mode 100644 index 0000000..b853bca --- /dev/null +++ b/granite-4.1-8b-Q5_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00f2df50b6bdb0b2e6c510f71cb502192b5271e55880f8a2a44a9d6736c5813d +size 6047708160 diff --git a/granite-4.1-8b-Q6_K.gguf b/granite-4.1-8b-Q6_K.gguf new file mode 100644 index 0000000..0cd1af1 --- /dev/null +++ b/granite-4.1-8b-Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:292a55c8ffd7ae6daf0db87a4399c70ed557158766a17f21ae528ab6bf10a222 +size 7146566656 diff --git a/granite-4.1-8b-Q7_K.gguf b/granite-4.1-8b-Q7_K.gguf new file mode 100644 index 0000000..5523855 --- /dev/null +++ b/granite-4.1-8b-Q7_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a6e17af4482a0358522d65c8f0776605bdc565beaeefe998cb3975ef06a722d +size 8245441536 diff --git a/granite-4.1-8b-Q8_0.gguf b/granite-4.1-8b-Q8_0.gguf new file mode 100644 index 0000000..78ba3d9 --- /dev/null +++ b/granite-4.1-8b-Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7dcd5c8a7650336bdce6628d5e7645d33f73c0a3da6a679251d9b48d624a66d2 +size 9343267840 diff --git a/imatrix/imatrix-granite-4.1-8b-medium.gguf b/imatrix/imatrix-granite-4.1-8b-medium.gguf new file mode 100644 index 0000000..4481666 --- /dev/null +++ b/imatrix/imatrix-granite-4.1-8b-medium.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:884c70cf05568063f8e14655e063ead2ad52546f1421caab3a4ad0b4cc648601 +size 12054912 diff --git a/logits/granite-4.1-8b.logits b/logits/granite-4.1-8b.logits new file mode 100644 index 0000000..4a3e3ef --- /dev/null +++ b/logits/granite-4.1-8b.logits @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57a822d9464e7d5cf600bf914065cb083783e20355f566609d0e2e0059b13e4f +size 28905288788 diff --git a/scores/granite-4.1-8b-Q1_L.md b/scores/granite-4.1-8b-Q1_L.md new file mode 100644 index 0000000..8b9f5e8 --- /dev/null +++ b/scores/granite-4.1-8b-Q1_L.md @@ -0,0 +1,1247 @@ +# granite-4.1-8b-WIP/granite-4.1-8b-Q1_L.gguf - GGUF Internal File Dump + +- Endian: LITTLE endian + +## Key Value Metadata Store + +There are 42 key-value pairs in this file + +| POS | TYPE | Count | Key | Value | +|----:|:---------|-------:|:-----------------------------------------|:--------------------------------------------------------------------| +| 1 | UINT32 | 1 | GGUF.version | 3 | +| 2 | UINT64 | 1 | GGUF.tensor_count | 363 | +| 3 | UINT64 | 1 | GGUF.kv_count | 39 | +| 4 | STRING | 1 | general.architecture | `granite` | +| 5 | STRING | 1 | general.type | `model` | +| 6 | STRING | 1 | general.name | `Granite 4.1 8b` | +| 7 | STRING | 1 | general.basename | `granite-4.1` | +| 8 | STRING | 1 | general.size_label | `8B` | +| 9 | STRING | 1 | general.license | `apache-2.0` | +| 10 | [STRING] | 2 | general.tags | [ `language`, `granite-4.1` ] | +| 11 | UINT32 | 1 | granite.block_count | 40 | +| 12 | UINT32 | 1 | granite.context_length | 131072 | +| 13 | UINT32 | 1 | granite.embedding_length | 4096 | +| 14 | UINT32 | 1 | granite.feed_forward_length | 12800 | +| 15 | UINT32 | 1 | granite.attention.head_count | 32 | +| 16 | UINT32 | 1 | granite.attention.head_count_kv | 8 | +| 17 | FLOAT32 | 1 | granite.rope.freq_base | 10000000.0 | +| 18 | FLOAT32 | 1 | granite.attention.layer_norm_rms_epsilon | 1e-05 | +| 19 | UINT32 | 1 | granite.vocab_size | 100352 | +| 20 | UINT32 | 1 | granite.rope.dimension_count | 128 | +| 21 | FLOAT32 | 1 | granite.attention.scale | 0.0078125 | +| 22 | FLOAT32 | 1 | granite.embedding_scale | 12.0 | +| 23 | FLOAT32 | 1 | granite.residual_scale | 0.22 | +| 24 | FLOAT32 | 1 | granite.logit_scale | 16.0 | +| 25 | STRING | 1 | tokenizer.ggml.model | `gpt2` | +| 26 | STRING | 1 | tokenizer.ggml.pre | `granite-docling` | +| 27 | [STRING] | 100352 | tokenizer.ggml.tokens | [ `!`, `"`, `#`, `$`, `%`, ... ] | +| 28 | [INT32] | 100352 | tokenizer.ggml.token_type | [ 1, 1, 1, 1, 1, 1, 1, ... ] | +| 29 | [STRING] | 100000 | tokenizer.ggml.merges | [ `Ġ Ġ`, `ĠĠ ĠĠ`, `i n`, `Ġ t`, `ĠĠĠĠ ĠĠĠĠ`, ... ] | +| 30 | UINT32 | 1 | tokenizer.ggml.bos_token_id | 100257 | +| 31 | UINT32 | 1 | tokenizer.ggml.eos_token_id | 100257 | +| 32 | UINT32 | 1 | tokenizer.ggml.unknown_token_id | 100269 | +| 33 | UINT32 | 1 | tokenizer.ggml.padding_token_id | 100256 | +| 34 | BOOL | 1 | tokenizer.ggml.add_bos_token | False | +| 35 | STRING | 1 | tokenizer.chat_template | `{%- set tools_system_message_p`...`end_of_role|>' }}{%- endif %}` | +| 36 | BOOL | 1 | tokenizer.ggml.add_space_prefix | False | +| 37 | UINT32 | 1 | general.quantization_version | 2 | +| 38 | UINT32 | 1 | general.file_type | 31 | +| 39 | STRING | 1 | quantize.imatrix.file | `imatrix/imatrix-granite-4.1-8b-medium.gguf` | +| 40 | STRING | 1 | quantize.imatrix.dataset | `../../datasets/imatrix/combined_all_medium.txt` | +| 41 | UINT32 | 1 | quantize.imatrix.entries_count | 280 | +| 42 | UINT32 | 1 | quantize.imatrix.chunks_count | 3601 | + +## Tensors Overview ~9B Elements + +Total number of elements in all tensors: 8791592960 Elements + +- [granite-4.1-8b-WIP/granite-4.1-8b-Q1\_L.gguf - GGUF Internal File Dump](#granite-41-8b-wipgranite-41-8b-q1_lgguf---gguf-internal-file-dump) + - [Key Value Metadata Store](#key-value-metadata-store) + - [Tensors Overview ~9B Elements](#tensors-overview-9b-elements) + - [Tensor Data Offset](#tensor-data-offset) + - [Base Tensor Group : ~822M Elements](#base-tensor-group--822m-elements) + - [Block 0 Tensor Group : ~199M Elements](#block-0-tensor-group--199m-elements) + - [Block 1 Tensor Group : ~199M Elements](#block-1-tensor-group--199m-elements) + - [Block 2 Tensor Group : ~199M Elements](#block-2-tensor-group--199m-elements) + - [Block 3 Tensor Group : ~199M Elements](#block-3-tensor-group--199m-elements) + - [Block 4 Tensor Group : ~199M Elements](#block-4-tensor-group--199m-elements) + - [Block 5 Tensor Group : ~199M Elements](#block-5-tensor-group--199m-elements) + - [Block 6 Tensor Group : ~199M Elements](#block-6-tensor-group--199m-elements) + - [Block 7 Tensor Group : ~199M Elements](#block-7-tensor-group--199m-elements) + - [Block 8 Tensor Group : ~199M Elements](#block-8-tensor-group--199m-elements) + - [Block 9 Tensor Group : ~199M Elements](#block-9-tensor-group--199m-elements) + - [Block 10 Tensor Group : ~199M Elements](#block-10-tensor-group--199m-elements) + - [Block 11 Tensor Group : ~199M Elements](#block-11-tensor-group--199m-elements) + - [Block 12 Tensor Group : ~199M Elements](#block-12-tensor-group--199m-elements) + - [Block 13 Tensor Group : ~199M Elements](#block-13-tensor-group--199m-elements) + - [Block 14 Tensor Group : ~199M Elements](#block-14-tensor-group--199m-elements) + - [Block 15 Tensor Group : ~199M Elements](#block-15-tensor-group--199m-elements) + - [Block 16 Tensor Group : ~199M Elements](#block-16-tensor-group--199m-elements) + - [Block 17 Tensor Group : ~199M Elements](#block-17-tensor-group--199m-elements) + - [Block 18 Tensor Group : ~199M Elements](#block-18-tensor-group--199m-elements) + - [Block 19 Tensor Group : ~199M Elements](#block-19-tensor-group--199m-elements) + - [Block 20 Tensor Group : ~199M Elements](#block-20-tensor-group--199m-elements) + - [Block 21 Tensor Group : ~199M Elements](#block-21-tensor-group--199m-elements) + - [Block 22 Tensor Group : ~199M Elements](#block-22-tensor-group--199m-elements) + - [Block 23 Tensor Group : ~199M Elements](#block-23-tensor-group--199m-elements) + - [Block 24 Tensor Group : ~199M Elements](#block-24-tensor-group--199m-elements) + - [Block 25 Tensor Group : ~199M Elements](#block-25-tensor-group--199m-elements) + - [Block 26 Tensor Group : ~199M Elements](#block-26-tensor-group--199m-elements) + - [Block 27 Tensor Group : ~199M Elements](#block-27-tensor-group--199m-elements) + - [Block 28 Tensor Group : ~199M Elements](#block-28-tensor-group--199m-elements) + - [Block 29 Tensor Group : ~199M Elements](#block-29-tensor-group--199m-elements) + - [Block 30 Tensor Group : ~199M Elements](#block-30-tensor-group--199m-elements) + - [Block 31 Tensor Group : ~199M Elements](#block-31-tensor-group--199m-elements) + - [Block 32 Tensor Group : ~199M Elements](#block-32-tensor-group--199m-elements) + - [Block 33 Tensor Group : ~199M Elements](#block-33-tensor-group--199m-elements) + - [Block 34 Tensor Group : ~199M Elements](#block-34-tensor-group--199m-elements) + - [Block 35 Tensor Group : ~199M Elements](#block-35-tensor-group--199m-elements) + - [Block 36 Tensor Group : ~199M Elements](#block-36-tensor-group--199m-elements) + - [Block 37 Tensor Group : ~199M Elements](#block-37-tensor-group--199m-elements) + - [Block 38 Tensor Group : ~199M Elements](#block-38-tensor-group--199m-elements) + - [Block 39 Tensor Group : ~199M Elements](#block-39-tensor-group--199m-elements) + +### Tensor Data Offset + +This table contains the offset and data segment relative to start of file + +| T_ID | Tensor Layer Name | Data Offset (B) | Data Size (B) | +|-----:|:--------------------------|-----------------:|-----------------:| +| 0 | output.weight | 0x367400 | 0x80a0000 | +| 1 | output_norm.weight | 0x8407400 | 0x4000 | +| 2 | token_embd.weight | 0x840b400 | 0x80a0000 | +| 3 | blk.0.attn_k.weight | 0x104ab400 | 0xc8000 | +| 4 | blk.0.attn_norm.weight | 0x10573400 | 0x4000 | +| 5 | blk.0.attn_output.weight | 0x10577400 | 0x380000 | +| 6 | blk.0.attn_q.weight | 0x108f7400 | 0x320000 | +| 7 | blk.0.attn_v.weight | 0x10c17400 | 0xc8000 | +| 8 | blk.0.ffn_down.weight | 0x10cdf400 | 0x9c4000 | +| 9 | blk.0.ffn_gate.weight | 0x116a3400 | 0x9c4000 | +| 10 | blk.0.ffn_norm.weight | 0x12067400 | 0x4000 | +| 11 | blk.0.ffn_up.weight | 0x1206b400 | 0x9c4000 | +| 12 | blk.1.attn_k.weight | 0x12a2f400 | 0xc8000 | +| 13 | blk.1.attn_norm.weight | 0x12af7400 | 0x4000 | +| 14 | blk.1.attn_output.weight | 0x12afb400 | 0x4a0000 | +| 15 | blk.1.attn_q.weight | 0x12f9b400 | 0x320000 | +| 16 | blk.1.attn_v.weight | 0x132bb400 | 0xe0000 | +| 17 | blk.1.ffn_down.weight | 0x1339b400 | 0xce4000 | +| 18 | blk.1.ffn_gate.weight | 0x1407f400 | 0x9c4000 | +| 19 | blk.1.ffn_norm.weight | 0x14a43400 | 0x4000 | +| 20 | blk.1.ffn_up.weight | 0x14a47400 | 0x9c4000 | +| 21 | blk.2.attn_k.weight | 0x1540b400 | 0xc8000 | +| 22 | blk.2.attn_norm.weight | 0x154d3400 | 0x4000 | +| 23 | blk.2.attn_output.weight | 0x154d7400 | 0x520000 | +| 24 | blk.2.attn_q.weight | 0x159f7400 | 0x320000 | +| 25 | blk.2.attn_v.weight | 0x15d17400 | 0xc8000 | +| 26 | blk.2.ffn_down.weight | 0x15ddf400 | 0x9c4000 | +| 27 | blk.2.ffn_gate.weight | 0x167a3400 | 0x9c4000 | +| 28 | blk.2.ffn_norm.weight | 0x17167400 | 0x4000 | +| 29 | blk.2.ffn_up.weight | 0x1716b400 | 0x9c4000 | +| 30 | blk.3.attn_k.weight | 0x17b2f400 | 0xc8000 | +| 31 | blk.3.attn_norm.weight | 0x17bf7400 | 0x4000 | +| 32 | blk.3.attn_output.weight | 0x17bfb400 | 0x520000 | +| 33 | blk.3.attn_q.weight | 0x1811b400 | 0x320000 | +| 34 | blk.3.attn_v.weight | 0x1843b400 | 0xc8000 | +| 35 | blk.3.ffn_down.weight | 0x18503400 | 0x9c4000 | +| 36 | blk.3.ffn_gate.weight | 0x18ec7400 | 0x9c4000 | +| 37 | blk.3.ffn_norm.weight | 0x1988b400 | 0x4000 | +| 38 | blk.3.ffn_up.weight | 0x1988f400 | 0x9c4000 | +| 39 | blk.4.attn_k.weight | 0x1a253400 | 0xe0000 | +| 40 | blk.4.attn_norm.weight | 0x1a333400 | 0x4000 | +| 41 | blk.4.attn_output.weight | 0x1a337400 | 0x520000 | +| 42 | blk.4.attn_q.weight | 0x1a857400 | 0x380000 | +| 43 | blk.4.attn_v.weight | 0x1abd7400 | 0xe0000 | +| 44 | blk.4.ffn_down.weight | 0x1acb7400 | 0x9c4000 | +| 45 | blk.4.ffn_gate.weight | 0x1b67b400 | 0x9c4000 | +| 46 | blk.4.ffn_norm.weight | 0x1c03f400 | 0x4000 | +| 47 | blk.4.ffn_up.weight | 0x1c043400 | 0x9c4000 | +| 48 | blk.5.attn_k.weight | 0x1ca07400 | 0xc8000 | +| 49 | blk.5.attn_norm.weight | 0x1cacf400 | 0x4000 | +| 50 | blk.5.attn_output.weight | 0x1cad3400 | 0x520000 | +| 51 | blk.5.attn_q.weight | 0x1cff3400 | 0x320000 | +| 52 | blk.5.attn_v.weight | 0x1d313400 | 0xc8000 | +| 53 | blk.5.ffn_down.weight | 0x1d3db400 | 0xaf0000 | +| 54 | blk.5.ffn_gate.weight | 0x1decb400 | 0x9c4000 | +| 55 | blk.5.ffn_norm.weight | 0x1e88f400 | 0x4000 | +| 56 | blk.5.ffn_up.weight | 0x1e893400 | 0x9c4000 | +| 57 | blk.6.attn_k.weight | 0x1f257400 | 0xc8000 | +| 58 | blk.6.attn_norm.weight | 0x1f31f400 | 0x4000 | +| 59 | blk.6.attn_output.weight | 0x1f323400 | 0x4a0000 | +| 60 | blk.6.attn_q.weight | 0x1f7c3400 | 0x320000 | +| 61 | blk.6.attn_v.weight | 0x1fae3400 | 0xc8000 | +| 62 | blk.6.ffn_down.weight | 0x1fbab400 | 0x9c4000 | +| 63 | blk.6.ffn_gate.weight | 0x2056f400 | 0x9c4000 | +| 64 | blk.6.ffn_norm.weight | 0x20f33400 | 0x4000 | +| 65 | blk.6.ffn_up.weight | 0x20f37400 | 0x9c4000 | +| 66 | blk.7.attn_k.weight | 0x218fb400 | 0xe0000 | +| 67 | blk.7.attn_norm.weight | 0x219db400 | 0x4000 | +| 68 | blk.7.attn_output.weight | 0x219df400 | 0x4a0000 | +| 69 | blk.7.attn_q.weight | 0x21e7f400 | 0x380000 | +| 70 | blk.7.attn_v.weight | 0x221ff400 | 0xc8000 | +| 71 | blk.7.ffn_down.weight | 0x222c7400 | 0x9c4000 | +| 72 | blk.7.ffn_gate.weight | 0x22c8b400 | 0x9c4000 | +| 73 | blk.7.ffn_norm.weight | 0x2364f400 | 0x4000 | +| 74 | blk.7.ffn_up.weight | 0x23653400 | 0x9c4000 | +| 75 | blk.8.attn_k.weight | 0x24017400 | 0xc8000 | +| 76 | blk.8.attn_norm.weight | 0x240df400 | 0x4000 | +| 77 | blk.8.attn_output.weight | 0x240e3400 | 0x4a0000 | +| 78 | blk.8.attn_q.weight | 0x24583400 | 0x320000 | +| 79 | blk.8.attn_v.weight | 0x248a3400 | 0xc8000 | +| 80 | blk.8.ffn_down.weight | 0x2496b400 | 0x9c4000 | +| 81 | blk.8.ffn_gate.weight | 0x2532f400 | 0x9c4000 | +| 82 | blk.8.ffn_norm.weight | 0x25cf3400 | 0x4000 | +| 83 | blk.8.ffn_up.weight | 0x25cf7400 | 0x9c4000 | +| 84 | blk.9.attn_k.weight | 0x266bb400 | 0xc8000 | +| 85 | blk.9.attn_norm.weight | 0x26783400 | 0x4000 | +| 86 | blk.9.attn_output.weight | 0x26787400 | 0x520000 | +| 87 | blk.9.attn_q.weight | 0x26ca7400 | 0x320000 | +| 88 | blk.9.attn_v.weight | 0x26fc7400 | 0xc8000 | +| 89 | blk.9.ffn_down.weight | 0x2708f400 | 0x9c4000 | +| 90 | blk.9.ffn_gate.weight | 0x27a53400 | 0x9c4000 | +| 91 | blk.9.ffn_norm.weight | 0x28417400 | 0x4000 | +| 92 | blk.9.ffn_up.weight | 0x2841b400 | 0x9c4000 | +| 93 | blk.10.attn_k.weight | 0x28ddf400 | 0xc8000 | +| 94 | blk.10.attn_norm.weight | 0x28ea7400 | 0x4000 | +| 95 | blk.10.attn_output.weight | 0x28eab400 | 0x4a0000 | +| 96 | blk.10.attn_q.weight | 0x2934b400 | 0x320000 | +| 97 | blk.10.attn_v.weight | 0x2966b400 | 0xc8000 | +| 98 | blk.10.ffn_down.weight | 0x29733400 | 0x9c4000 | +| 99 | blk.10.ffn_gate.weight | 0x2a0f7400 | 0x9c4000 | +| 100 | blk.10.ffn_norm.weight | 0x2aabb400 | 0x4000 | +| 101 | blk.10.ffn_up.weight | 0x2aabf400 | 0x9c4000 | +| 102 | blk.11.attn_k.weight | 0x2b483400 | 0xc8000 | +| 103 | blk.11.attn_norm.weight | 0x2b54b400 | 0x4000 | +| 104 | blk.11.attn_output.weight | 0x2b54f400 | 0x4a0000 | +| 105 | blk.11.attn_q.weight | 0x2b9ef400 | 0x320000 | +| 106 | blk.11.attn_v.weight | 0x2bd0f400 | 0xc8000 | +| 107 | blk.11.ffn_down.weight | 0x2bdd7400 | 0x9c4000 | +| 108 | blk.11.ffn_gate.weight | 0x2c79b400 | 0x9c4000 | +| 109 | blk.11.ffn_norm.weight | 0x2d15f400 | 0x4000 | +| 110 | blk.11.ffn_up.weight | 0x2d163400 | 0x9c4000 | +| 111 | blk.12.attn_k.weight | 0x2db27400 | 0xc8000 | +| 112 | blk.12.attn_norm.weight | 0x2dbef400 | 0x4000 | +| 113 | blk.12.attn_output.weight | 0x2dbf3400 | 0x520000 | +| 114 | blk.12.attn_q.weight | 0x2e113400 | 0x320000 | +| 115 | blk.12.attn_v.weight | 0x2e433400 | 0xc8000 | +| 116 | blk.12.ffn_down.weight | 0x2e4fb400 | 0x9c4000 | +| 117 | blk.12.ffn_gate.weight | 0x2eebf400 | 0x9c4000 | +| 118 | blk.12.ffn_norm.weight | 0x2f883400 | 0x4000 | +| 119 | blk.12.ffn_up.weight | 0x2f887400 | 0x9c4000 | +| 120 | blk.13.attn_k.weight | 0x3024b400 | 0xc8000 | +| 121 | blk.13.attn_norm.weight | 0x30313400 | 0x4000 | +| 122 | blk.13.attn_output.weight | 0x30317400 | 0x4a0000 | +| 123 | blk.13.attn_q.weight | 0x307b7400 | 0x320000 | +| 124 | blk.13.attn_v.weight | 0x30ad7400 | 0xc8000 | +| 125 | blk.13.ffn_down.weight | 0x30b9f400 | 0x9c4000 | +| 126 | blk.13.ffn_gate.weight | 0x31563400 | 0x9c4000 | +| 127 | blk.13.ffn_norm.weight | 0x31f27400 | 0x4000 | +| 128 | blk.13.ffn_up.weight | 0x31f2b400 | 0x9c4000 | +| 129 | blk.14.attn_k.weight | 0x328ef400 | 0xc8000 | +| 130 | blk.14.attn_norm.weight | 0x329b7400 | 0x4000 | +| 131 | blk.14.attn_output.weight | 0x329bb400 | 0x4a0000 | +| 132 | blk.14.attn_q.weight | 0x32e5b400 | 0x320000 | +| 133 | blk.14.attn_v.weight | 0x3317b400 | 0xc8000 | +| 134 | blk.14.ffn_down.weight | 0x33243400 | 0x9c4000 | +| 135 | blk.14.ffn_gate.weight | 0x33c07400 | 0x9c4000 | +| 136 | blk.14.ffn_norm.weight | 0x345cb400 | 0x4000 | +| 137 | blk.14.ffn_up.weight | 0x345cf400 | 0x9c4000 | +| 138 | blk.15.attn_k.weight | 0x34f93400 | 0xe0000 | +| 139 | blk.15.attn_norm.weight | 0x35073400 | 0x4000 | +| 140 | blk.15.attn_output.weight | 0x35077400 | 0x4a0000 | +| 141 | blk.15.attn_q.weight | 0x35517400 | 0x380000 | +| 142 | blk.15.attn_v.weight | 0x35897400 | 0xc8000 | +| 143 | blk.15.ffn_down.weight | 0x3595f400 | 0xaf0000 | +| 144 | blk.15.ffn_gate.weight | 0x3644f400 | 0x9c4000 | +| 145 | blk.15.ffn_norm.weight | 0x36e13400 | 0x4000 | +| 146 | blk.15.ffn_up.weight | 0x36e17400 | 0x9c4000 | +| 147 | blk.16.attn_k.weight | 0x377db400 | 0xe0000 | +| 148 | blk.16.attn_norm.weight | 0x378bb400 | 0x4000 | +| 149 | blk.16.attn_output.weight | 0x378bf400 | 0x4a0000 | +| 150 | blk.16.attn_q.weight | 0x37d5f400 | 0x380000 | +| 151 | blk.16.attn_v.weight | 0x380df400 | 0xc8000 | +| 152 | blk.16.ffn_down.weight | 0x381a7400 | 0xaf0000 | +| 153 | blk.16.ffn_gate.weight | 0x38c97400 | 0x9c4000 | +| 154 | blk.16.ffn_norm.weight | 0x3965b400 | 0x4000 | +| 155 | blk.16.ffn_up.weight | 0x3965f400 | 0x9c4000 | +| 156 | blk.17.attn_k.weight | 0x3a023400 | 0xc8000 | +| 157 | blk.17.attn_norm.weight | 0x3a0eb400 | 0x4000 | +| 158 | blk.17.attn_output.weight | 0x3a0ef400 | 0x4a0000 | +| 159 | blk.17.attn_q.weight | 0x3a58f400 | 0x320000 | +| 160 | blk.17.attn_v.weight | 0x3a8af400 | 0xc8000 | +| 161 | blk.17.ffn_down.weight | 0x3a977400 | 0x9c4000 | +| 162 | blk.17.ffn_gate.weight | 0x3b33b400 | 0x9c4000 | +| 163 | blk.17.ffn_norm.weight | 0x3bcff400 | 0x4000 | +| 164 | blk.17.ffn_up.weight | 0x3bd03400 | 0x9c4000 | +| 165 | blk.18.attn_k.weight | 0x3c6c7400 | 0xc8000 | +| 166 | blk.18.attn_norm.weight | 0x3c78f400 | 0x4000 | +| 167 | blk.18.attn_output.weight | 0x3c793400 | 0x4a0000 | +| 168 | blk.18.attn_q.weight | 0x3cc33400 | 0x320000 | +| 169 | blk.18.attn_v.weight | 0x3cf53400 | 0xc8000 | +| 170 | blk.18.ffn_down.weight | 0x3d01b400 | 0xaf0000 | +| 171 | blk.18.ffn_gate.weight | 0x3db0b400 | 0x9c4000 | +| 172 | blk.18.ffn_norm.weight | 0x3e4cf400 | 0x4000 | +| 173 | blk.18.ffn_up.weight | 0x3e4d3400 | 0x9c4000 | +| 174 | blk.19.attn_k.weight | 0x3ee97400 | 0xc8000 | +| 175 | blk.19.attn_norm.weight | 0x3ef5f400 | 0x4000 | +| 176 | blk.19.attn_output.weight | 0x3ef63400 | 0x4a0000 | +| 177 | blk.19.attn_q.weight | 0x3f403400 | 0x320000 | +| 178 | blk.19.attn_v.weight | 0x3f723400 | 0xc8000 | +| 179 | blk.19.ffn_down.weight | 0x3f7eb400 | 0xaf0000 | +| 180 | blk.19.ffn_gate.weight | 0x402db400 | 0x9c4000 | +| 181 | blk.19.ffn_norm.weight | 0x40c9f400 | 0x4000 | +| 182 | blk.19.ffn_up.weight | 0x40ca3400 | 0x9c4000 | +| 183 | blk.20.attn_k.weight | 0x41667400 | 0xc8000 | +| 184 | blk.20.attn_norm.weight | 0x4172f400 | 0x4000 | +| 185 | blk.20.attn_output.weight | 0x41733400 | 0x520000 | +| 186 | blk.20.attn_q.weight | 0x41c53400 | 0x320000 | +| 187 | blk.20.attn_v.weight | 0x41f73400 | 0xc8000 | +| 188 | blk.20.ffn_down.weight | 0x4203b400 | 0x9c4000 | +| 189 | blk.20.ffn_gate.weight | 0x429ff400 | 0x9c4000 | +| 190 | blk.20.ffn_norm.weight | 0x433c3400 | 0x4000 | +| 191 | blk.20.ffn_up.weight | 0x433c7400 | 0x9c4000 | +| 192 | blk.21.attn_k.weight | 0x43d8b400 | 0xc8000 | +| 193 | blk.21.attn_norm.weight | 0x43e53400 | 0x4000 | +| 194 | blk.21.attn_output.weight | 0x43e57400 | 0x520000 | +| 195 | blk.21.attn_q.weight | 0x44377400 | 0x320000 | +| 196 | blk.21.attn_v.weight | 0x44697400 | 0xc8000 | +| 197 | blk.21.ffn_down.weight | 0x4475f400 | 0xaf0000 | +| 198 | blk.21.ffn_gate.weight | 0x4524f400 | 0x9c4000 | +| 199 | blk.21.ffn_norm.weight | 0x45c13400 | 0x4000 | +| 200 | blk.21.ffn_up.weight | 0x45c17400 | 0x9c4000 | +| 201 | blk.22.attn_k.weight | 0x465db400 | 0xc8000 | +| 202 | blk.22.attn_norm.weight | 0x466a3400 | 0x4000 | +| 203 | blk.22.attn_output.weight | 0x466a7400 | 0x520000 | +| 204 | blk.22.attn_q.weight | 0x46bc7400 | 0x320000 | +| 205 | blk.22.attn_v.weight | 0x46ee7400 | 0xc8000 | +| 206 | blk.22.ffn_down.weight | 0x46faf400 | 0xaf0000 | +| 207 | blk.22.ffn_gate.weight | 0x47a9f400 | 0x9c4000 | +| 208 | blk.22.ffn_norm.weight | 0x48463400 | 0x4000 | +| 209 | blk.22.ffn_up.weight | 0x48467400 | 0x9c4000 | +| 210 | blk.23.attn_k.weight | 0x48e2b400 | 0xc8000 | +| 211 | blk.23.attn_norm.weight | 0x48ef3400 | 0x4000 | +| 212 | blk.23.attn_output.weight | 0x48ef7400 | 0x4a0000 | +| 213 | blk.23.attn_q.weight | 0x49397400 | 0x320000 | +| 214 | blk.23.attn_v.weight | 0x496b7400 | 0xc8000 | +| 215 | blk.23.ffn_down.weight | 0x4977f400 | 0x9c4000 | +| 216 | blk.23.ffn_gate.weight | 0x4a143400 | 0x9c4000 | +| 217 | blk.23.ffn_norm.weight | 0x4ab07400 | 0x4000 | +| 218 | blk.23.ffn_up.weight | 0x4ab0b400 | 0x9c4000 | +| 219 | blk.24.attn_k.weight | 0x4b4cf400 | 0xc8000 | +| 220 | blk.24.attn_norm.weight | 0x4b597400 | 0x4000 | +| 221 | blk.24.attn_output.weight | 0x4b59b400 | 0x520000 | +| 222 | blk.24.attn_q.weight | 0x4babb400 | 0x320000 | +| 223 | blk.24.attn_v.weight | 0x4bddb400 | 0xc8000 | +| 224 | blk.24.ffn_down.weight | 0x4bea3400 | 0xaf0000 | +| 225 | blk.24.ffn_gate.weight | 0x4c993400 | 0x9c4000 | +| 226 | blk.24.ffn_norm.weight | 0x4d357400 | 0x4000 | +| 227 | blk.24.ffn_up.weight | 0x4d35b400 | 0x9c4000 | +| 228 | blk.25.attn_k.weight | 0x4dd1f400 | 0xc8000 | +| 229 | blk.25.attn_norm.weight | 0x4dde7400 | 0x4000 | +| 230 | blk.25.attn_output.weight | 0x4ddeb400 | 0x4a0000 | +| 231 | blk.25.attn_q.weight | 0x4e28b400 | 0x320000 | +| 232 | blk.25.attn_v.weight | 0x4e5ab400 | 0xc8000 | +| 233 | blk.25.ffn_down.weight | 0x4e673400 | 0xaf0000 | +| 234 | blk.25.ffn_gate.weight | 0x4f163400 | 0x9c4000 | +| 235 | blk.25.ffn_norm.weight | 0x4fb27400 | 0x4000 | +| 236 | blk.25.ffn_up.weight | 0x4fb2b400 | 0x9c4000 | +| 237 | blk.26.attn_k.weight | 0x504ef400 | 0xc8000 | +| 238 | blk.26.attn_norm.weight | 0x505b7400 | 0x4000 | +| 239 | blk.26.attn_output.weight | 0x505bb400 | 0x520000 | +| 240 | blk.26.attn_q.weight | 0x50adb400 | 0x320000 | +| 241 | blk.26.attn_v.weight | 0x50dfb400 | 0xc8000 | +| 242 | blk.26.ffn_down.weight | 0x50ec3400 | 0xaf0000 | +| 243 | blk.26.ffn_gate.weight | 0x519b3400 | 0x9c4000 | +| 244 | blk.26.ffn_norm.weight | 0x52377400 | 0x4000 | +| 245 | blk.26.ffn_up.weight | 0x5237b400 | 0x9c4000 | +| 246 | blk.27.attn_k.weight | 0x52d3f400 | 0xe0000 | +| 247 | blk.27.attn_norm.weight | 0x52e1f400 | 0x4000 | +| 248 | blk.27.attn_output.weight | 0x52e23400 | 0x4a0000 | +| 249 | blk.27.attn_q.weight | 0x532c3400 | 0x320000 | +| 250 | blk.27.attn_v.weight | 0x535e3400 | 0xc8000 | +| 251 | blk.27.ffn_down.weight | 0x536ab400 | 0x9c4000 | +| 252 | blk.27.ffn_gate.weight | 0x5406f400 | 0x9c4000 | +| 253 | blk.27.ffn_norm.weight | 0x54a33400 | 0x4000 | +| 254 | blk.27.ffn_up.weight | 0x54a37400 | 0x9c4000 | +| 255 | blk.28.attn_k.weight | 0x553fb400 | 0xe0000 | +| 256 | blk.28.attn_norm.weight | 0x554db400 | 0x4000 | +| 257 | blk.28.attn_output.weight | 0x554df400 | 0x4a0000 | +| 258 | blk.28.attn_q.weight | 0x5597f400 | 0x380000 | +| 259 | blk.28.attn_v.weight | 0x55cff400 | 0xc8000 | +| 260 | blk.28.ffn_down.weight | 0x55dc7400 | 0xaf0000 | +| 261 | blk.28.ffn_gate.weight | 0x568b7400 | 0x9c4000 | +| 262 | blk.28.ffn_norm.weight | 0x5727b400 | 0x4000 | +| 263 | blk.28.ffn_up.weight | 0x5727f400 | 0x9c4000 | +| 264 | blk.29.attn_k.weight | 0x57c43400 | 0xc8000 | +| 265 | blk.29.attn_norm.weight | 0x57d0b400 | 0x4000 | +| 266 | blk.29.attn_output.weight | 0x57d0f400 | 0x4a0000 | +| 267 | blk.29.attn_q.weight | 0x581af400 | 0x320000 | +| 268 | blk.29.attn_v.weight | 0x584cf400 | 0xc8000 | +| 269 | blk.29.ffn_down.weight | 0x58597400 | 0x9c4000 | +| 270 | blk.29.ffn_gate.weight | 0x58f5b400 | 0x9c4000 | +| 271 | blk.29.ffn_norm.weight | 0x5991f400 | 0x4000 | +| 272 | blk.29.ffn_up.weight | 0x59923400 | 0x9c4000 | +| 273 | blk.30.attn_k.weight | 0x5a2e7400 | 0xe0000 | +| 274 | blk.30.attn_norm.weight | 0x5a3c7400 | 0x4000 | +| 275 | blk.30.attn_output.weight | 0x5a3cb400 | 0x4a0000 | +| 276 | blk.30.attn_q.weight | 0x5a86b400 | 0x380000 | +| 277 | blk.30.attn_v.weight | 0x5abeb400 | 0xc8000 | +| 278 | blk.30.ffn_down.weight | 0x5acb3400 | 0xaf0000 | +| 279 | blk.30.ffn_gate.weight | 0x5b7a3400 | 0x9c4000 | +| 280 | blk.30.ffn_norm.weight | 0x5c167400 | 0x4000 | +| 281 | blk.30.ffn_up.weight | 0x5c16b400 | 0x9c4000 | +| 282 | blk.31.attn_k.weight | 0x5cb2f400 | 0xc8000 | +| 283 | blk.31.attn_norm.weight | 0x5cbf7400 | 0x4000 | +| 284 | blk.31.attn_output.weight | 0x5cbfb400 | 0x520000 | +| 285 | blk.31.attn_q.weight | 0x5d11b400 | 0x320000 | +| 286 | blk.31.attn_v.weight | 0x5d43b400 | 0xc8000 | +| 287 | blk.31.ffn_down.weight | 0x5d503400 | 0xaf0000 | +| 288 | blk.31.ffn_gate.weight | 0x5dff3400 | 0x9c4000 | +| 289 | blk.31.ffn_norm.weight | 0x5e9b7400 | 0x4000 | +| 290 | blk.31.ffn_up.weight | 0x5e9bb400 | 0x9c4000 | +| 291 | blk.32.attn_k.weight | 0x5f37f400 | 0xc8000 | +| 292 | blk.32.attn_norm.weight | 0x5f447400 | 0x4000 | +| 293 | blk.32.attn_output.weight | 0x5f44b400 | 0x4a0000 | +| 294 | blk.32.attn_q.weight | 0x5f8eb400 | 0x320000 | +| 295 | blk.32.attn_v.weight | 0x5fc0b400 | 0xc8000 | +| 296 | blk.32.ffn_down.weight | 0x5fcd3400 | 0xaf0000 | +| 297 | blk.32.ffn_gate.weight | 0x607c3400 | 0x9c4000 | +| 298 | blk.32.ffn_norm.weight | 0x61187400 | 0x4000 | +| 299 | blk.32.ffn_up.weight | 0x6118b400 | 0x9c4000 | +| 300 | blk.33.attn_k.weight | 0x61b4f400 | 0xc8000 | +| 301 | blk.33.attn_norm.weight | 0x61c17400 | 0x4000 | +| 302 | blk.33.attn_output.weight | 0x61c1b400 | 0x520000 | +| 303 | blk.33.attn_q.weight | 0x6213b400 | 0x320000 | +| 304 | blk.33.attn_v.weight | 0x6245b400 | 0xc8000 | +| 305 | blk.33.ffn_down.weight | 0x62523400 | 0x9c4000 | +| 306 | blk.33.ffn_gate.weight | 0x62ee7400 | 0x9c4000 | +| 307 | blk.33.ffn_norm.weight | 0x638ab400 | 0x4000 | +| 308 | blk.33.ffn_up.weight | 0x638af400 | 0x9c4000 | +| 309 | blk.34.attn_k.weight | 0x64273400 | 0xc8000 | +| 310 | blk.34.attn_norm.weight | 0x6433b400 | 0x4000 | +| 311 | blk.34.attn_output.weight | 0x6433f400 | 0x4a0000 | +| 312 | blk.34.attn_q.weight | 0x647df400 | 0x320000 | +| 313 | blk.34.attn_v.weight | 0x64aff400 | 0xc8000 | +| 314 | blk.34.ffn_down.weight | 0x64bc7400 | 0xaf0000 | +| 315 | blk.34.ffn_gate.weight | 0x656b7400 | 0x9c4000 | +| 316 | blk.34.ffn_norm.weight | 0x6607b400 | 0x4000 | +| 317 | blk.34.ffn_up.weight | 0x6607f400 | 0x9c4000 | +| 318 | blk.35.attn_k.weight | 0x66a43400 | 0xc8000 | +| 319 | blk.35.attn_norm.weight | 0x66b0b400 | 0x4000 | +| 320 | blk.35.attn_output.weight | 0x66b0f400 | 0x520000 | +| 321 | blk.35.attn_q.weight | 0x6702f400 | 0x320000 | +| 322 | blk.35.attn_v.weight | 0x6734f400 | 0xc8000 | +| 323 | blk.35.ffn_down.weight | 0x67417400 | 0x9c4000 | +| 324 | blk.35.ffn_gate.weight | 0x67ddb400 | 0x9c4000 | +| 325 | blk.35.ffn_norm.weight | 0x6879f400 | 0x4000 | +| 326 | blk.35.ffn_up.weight | 0x687a3400 | 0x9c4000 | +| 327 | blk.36.attn_k.weight | 0x69167400 | 0xc8000 | +| 328 | blk.36.attn_norm.weight | 0x6922f400 | 0x4000 | +| 329 | blk.36.attn_output.weight | 0x69233400 | 0x520000 | +| 330 | blk.36.attn_q.weight | 0x69753400 | 0x320000 | +| 331 | blk.36.attn_v.weight | 0x69a73400 | 0xc8000 | +| 332 | blk.36.ffn_down.weight | 0x69b3b400 | 0x9c4000 | +| 333 | blk.36.ffn_gate.weight | 0x6a4ff400 | 0x9c4000 | +| 334 | blk.36.ffn_norm.weight | 0x6aec3400 | 0x4000 | +| 335 | blk.36.ffn_up.weight | 0x6aec7400 | 0x9c4000 | +| 336 | blk.37.attn_k.weight | 0x6b88b400 | 0xc8000 | +| 337 | blk.37.attn_norm.weight | 0x6b953400 | 0x4000 | +| 338 | blk.37.attn_output.weight | 0x6b957400 | 0x520000 | +| 339 | blk.37.attn_q.weight | 0x6be77400 | 0x320000 | +| 340 | blk.37.attn_v.weight | 0x6c197400 | 0xc8000 | +| 341 | blk.37.ffn_down.weight | 0x6c25f400 | 0x9c4000 | +| 342 | blk.37.ffn_gate.weight | 0x6cc23400 | 0x9c4000 | +| 343 | blk.37.ffn_norm.weight | 0x6d5e7400 | 0x4000 | +| 344 | blk.37.ffn_up.weight | 0x6d5eb400 | 0x9c4000 | +| 345 | blk.38.attn_k.weight | 0x6dfaf400 | 0xc8000 | +| 346 | blk.38.attn_norm.weight | 0x6e077400 | 0x4000 | +| 347 | blk.38.attn_output.weight | 0x6e07b400 | 0x520000 | +| 348 | blk.38.attn_q.weight | 0x6e59b400 | 0x320000 | +| 349 | blk.38.attn_v.weight | 0x6e8bb400 | 0xc8000 | +| 350 | blk.38.ffn_down.weight | 0x6e983400 | 0x9c4000 | +| 351 | blk.38.ffn_gate.weight | 0x6f347400 | 0x9c4000 | +| 352 | blk.38.ffn_norm.weight | 0x6fd0b400 | 0x4000 | +| 353 | blk.38.ffn_up.weight | 0x6fd0f400 | 0x9c4000 | +| 354 | blk.39.attn_k.weight | 0x706d3400 | 0xc8000 | +| 355 | blk.39.attn_norm.weight | 0x7079b400 | 0x4000 | +| 356 | blk.39.attn_output.weight | 0x7079f400 | 0x4a0000 | +| 357 | blk.39.attn_q.weight | 0x70c3f400 | 0x320000 | +| 358 | blk.39.attn_v.weight | 0x70f5f400 | 0xc8000 | +| 359 | blk.39.ffn_down.weight | 0x71027400 | 0x9c4000 | +| 360 | blk.39.ffn_gate.weight | 0x719eb400 | 0x9c4000 | +| 361 | blk.39.ffn_norm.weight | 0x723af400 | 0x4000 | +| 362 | blk.39.ffn_up.weight | 0x723b3400 | 0x9c4000 | + +### Base Tensor Group : ~822M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------|:---------------------------------|:------------------|:----------------------|:-----|--------:| +| 0 | output.weight | Output (W) | (~411M) 411041792 | 4096 x 100352 x 1 x 1 | Q2_K | 2.6250 | +| 1 | output_norm.weight | Output Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 2 | token_embd.weight | Token Embedding (W) | (~411M) 411041792 | 4096 x 100352 x 1 x 1 | Q2_K | 2.6250 | + +- Total elements in base: (~822M) 822087680 +- Percentage of total elements: 9.35% +- Bits per Weight (BPW) for base: 2.6251 bits + + +### Block 0 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:------|--------:| +| 3 | blk.0.attn_k.weight | Block 0 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 4 | blk.0.attn_norm.weight | Block 0 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 5 | blk.0.attn_output.weight | Block 0 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 6 | blk.0.attn_q.weight | Block 0 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 7 | blk.0.attn_v.weight | Block 0 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 8 | blk.0.ffn_down.weight | Block 0 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 9 | blk.0.ffn_gate.weight | Block 0 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 10 | blk.0.ffn_norm.weight | Block 0 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 11 | blk.0.ffn_up.weight | Block 0 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.0: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.0: 1.5795 bits + + +### Block 1 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 12 | blk.1.attn_k.weight | Block 1 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 13 | blk.1.attn_norm.weight | Block 1 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 14 | blk.1.attn_output.weight | Block 1 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 15 | blk.1.attn_q.weight | Block 1 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 16 | blk.1.attn_v.weight | Block 1 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_M | 1.7500 | +| 17 | blk.1.ffn_down.weight | Block 1 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_XXS | 2.0625 | +| 18 | blk.1.ffn_gate.weight | Block 1 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 19 | blk.1.ffn_norm.weight | Block 1 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 20 | blk.1.ffn_up.weight | Block 1 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.1: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.1: 1.7624 bits + + +### Block 2 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:------|--------:| +| 21 | blk.2.attn_k.weight | Block 2 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 22 | blk.2.attn_norm.weight | Block 2 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 23 | blk.2.attn_output.weight | Block 2 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 24 | blk.2.attn_q.weight | Block 2 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 25 | blk.2.attn_v.weight | Block 2 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 26 | blk.2.ffn_down.weight | Block 2 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 27 | blk.2.ffn_gate.weight | Block 2 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 28 | blk.2.ffn_norm.weight | Block 2 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 29 | blk.2.ffn_up.weight | Block 2 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.2: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.2: 1.6480 bits + + +### Block 3 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:------|--------:| +| 30 | blk.3.attn_k.weight | Block 3 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 31 | blk.3.attn_norm.weight | Block 3 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 32 | blk.3.attn_output.weight | Block 3 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 33 | blk.3.attn_q.weight | Block 3 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 34 | blk.3.attn_v.weight | Block 3 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 35 | blk.3.ffn_down.weight | Block 3 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 36 | blk.3.ffn_gate.weight | Block 3 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 37 | blk.3.ffn_norm.weight | Block 3 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 38 | blk.3.ffn_up.weight | Block 3 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.3: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.3: 1.6480 bits + + +### Block 4 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:------|--------:| +| 39 | blk.4.attn_k.weight | Block 4 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_M | 1.7500 | +| 40 | blk.4.attn_norm.weight | Block 4 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 41 | blk.4.attn_output.weight | Block 4 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 42 | blk.4.attn_q.weight | Block 4 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 43 | blk.4.attn_v.weight | Block 4 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_M | 1.7500 | +| 44 | blk.4.ffn_down.weight | Block 4 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 45 | blk.4.ffn_gate.weight | Block 4 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 46 | blk.4.ffn_norm.weight | Block 4 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 47 | blk.4.ffn_up.weight | Block 4 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.4: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.4: 1.6716 bits + + +### Block 5 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:------|--------:| +| 48 | blk.5.attn_k.weight | Block 5 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 49 | blk.5.attn_norm.weight | Block 5 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 50 | blk.5.attn_output.weight | Block 5 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 51 | blk.5.attn_q.weight | Block 5 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 52 | blk.5.attn_v.weight | Block 5 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 53 | blk.5.ffn_down.weight | Block 5 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 54 | blk.5.ffn_gate.weight | Block 5 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 55 | blk.5.ffn_norm.weight | Block 5 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 56 | blk.5.ffn_up.weight | Block 5 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.5: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.5: 1.6973 bits + + +### Block 6 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 57 | blk.6.attn_k.weight | Block 6 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 58 | blk.6.attn_norm.weight | Block 6 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 59 | blk.6.attn_output.weight | Block 6 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 60 | blk.6.attn_q.weight | Block 6 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 61 | blk.6.attn_v.weight | Block 6 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 62 | blk.6.ffn_down.weight | Block 6 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 63 | blk.6.ffn_gate.weight | Block 6 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 64 | blk.6.ffn_norm.weight | Block 6 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 65 | blk.6.ffn_up.weight | Block 6 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.6: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.6: 1.6269 bits + + +### Block 7 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 66 | blk.7.attn_k.weight | Block 7 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_M | 1.7500 | +| 67 | blk.7.attn_norm.weight | Block 7 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 68 | blk.7.attn_output.weight | Block 7 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 69 | blk.7.attn_q.weight | Block 7 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 70 | blk.7.attn_v.weight | Block 7 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 71 | blk.7.ffn_down.weight | Block 7 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 72 | blk.7.ffn_gate.weight | Block 7 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 73 | blk.7.ffn_norm.weight | Block 7 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 74 | blk.7.ffn_up.weight | Block 7 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.7: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.7: 1.6466 bits + + +### Block 8 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 75 | blk.8.attn_k.weight | Block 8 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 76 | blk.8.attn_norm.weight | Block 8 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 77 | blk.8.attn_output.weight | Block 8 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 78 | blk.8.attn_q.weight | Block 8 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 79 | blk.8.attn_v.weight | Block 8 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 80 | blk.8.ffn_down.weight | Block 8 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 81 | blk.8.ffn_gate.weight | Block 8 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 82 | blk.8.ffn_norm.weight | Block 8 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 83 | blk.8.ffn_up.weight | Block 8 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.8: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.8: 1.6269 bits + + +### Block 9 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:------|--------:| +| 84 | blk.9.attn_k.weight | Block 9 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 85 | blk.9.attn_norm.weight | Block 9 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 86 | blk.9.attn_output.weight | Block 9 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 87 | blk.9.attn_q.weight | Block 9 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 88 | blk.9.attn_v.weight | Block 9 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 89 | blk.9.ffn_down.weight | Block 9 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 90 | blk.9.ffn_gate.weight | Block 9 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 91 | blk.9.ffn_norm.weight | Block 9 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 92 | blk.9.ffn_up.weight | Block 9 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.9: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.9: 1.6480 bits + + +### Block 10 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 93 | blk.10.attn_k.weight | Block 10 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 94 | blk.10.attn_norm.weight | Block 10 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 95 | blk.10.attn_output.weight | Block 10 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 96 | blk.10.attn_q.weight | Block 10 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 97 | blk.10.attn_v.weight | Block 10 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 98 | blk.10.ffn_down.weight | Block 10 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 99 | blk.10.ffn_gate.weight | Block 10 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 100 | blk.10.ffn_norm.weight | Block 10 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 101 | blk.10.ffn_up.weight | Block 10 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.10: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.10: 1.6269 bits + + +### Block 11 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 102 | blk.11.attn_k.weight | Block 11 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 103 | blk.11.attn_norm.weight | Block 11 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 104 | blk.11.attn_output.weight | Block 11 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 105 | blk.11.attn_q.weight | Block 11 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 106 | blk.11.attn_v.weight | Block 11 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 107 | blk.11.ffn_down.weight | Block 11 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 108 | blk.11.ffn_gate.weight | Block 11 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 109 | blk.11.ffn_norm.weight | Block 11 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 110 | blk.11.ffn_up.weight | Block 11 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.11: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.11: 1.6269 bits + + +### Block 12 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:------|--------:| +| 111 | blk.12.attn_k.weight | Block 12 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 112 | blk.12.attn_norm.weight | Block 12 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 113 | blk.12.attn_output.weight | Block 12 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 114 | blk.12.attn_q.weight | Block 12 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 115 | blk.12.attn_v.weight | Block 12 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 116 | blk.12.ffn_down.weight | Block 12 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 117 | blk.12.ffn_gate.weight | Block 12 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 118 | blk.12.ffn_norm.weight | Block 12 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 119 | blk.12.ffn_up.weight | Block 12 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.12: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.12: 1.6480 bits + + +### Block 13 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 120 | blk.13.attn_k.weight | Block 13 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 121 | blk.13.attn_norm.weight | Block 13 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 122 | blk.13.attn_output.weight | Block 13 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 123 | blk.13.attn_q.weight | Block 13 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 124 | blk.13.attn_v.weight | Block 13 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 125 | blk.13.ffn_down.weight | Block 13 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 126 | blk.13.ffn_gate.weight | Block 13 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 127 | blk.13.ffn_norm.weight | Block 13 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 128 | blk.13.ffn_up.weight | Block 13 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.13: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.13: 1.6269 bits + + +### Block 14 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 129 | blk.14.attn_k.weight | Block 14 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 130 | blk.14.attn_norm.weight | Block 14 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 131 | blk.14.attn_output.weight | Block 14 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 132 | blk.14.attn_q.weight | Block 14 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 133 | blk.14.attn_v.weight | Block 14 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 134 | blk.14.ffn_down.weight | Block 14 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 135 | blk.14.ffn_gate.weight | Block 14 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 136 | blk.14.ffn_norm.weight | Block 14 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 137 | blk.14.ffn_up.weight | Block 14 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.14: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.14: 1.6269 bits + + +### Block 15 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 138 | blk.15.attn_k.weight | Block 15 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_M | 1.7500 | +| 139 | blk.15.attn_norm.weight | Block 15 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 140 | blk.15.attn_output.weight | Block 15 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 141 | blk.15.attn_q.weight | Block 15 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 142 | blk.15.attn_v.weight | Block 15 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 143 | blk.15.ffn_down.weight | Block 15 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 144 | blk.15.ffn_gate.weight | Block 15 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 145 | blk.15.ffn_norm.weight | Block 15 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 146 | blk.15.ffn_up.weight | Block 15 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.15: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.15: 1.6960 bits + + +### Block 16 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 147 | blk.16.attn_k.weight | Block 16 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_M | 1.7500 | +| 148 | blk.16.attn_norm.weight | Block 16 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 149 | blk.16.attn_output.weight | Block 16 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 150 | blk.16.attn_q.weight | Block 16 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 151 | blk.16.attn_v.weight | Block 16 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 152 | blk.16.ffn_down.weight | Block 16 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 153 | blk.16.ffn_gate.weight | Block 16 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 154 | blk.16.ffn_norm.weight | Block 16 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 155 | blk.16.ffn_up.weight | Block 16 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.16: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.16: 1.6960 bits + + +### Block 17 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 156 | blk.17.attn_k.weight | Block 17 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 157 | blk.17.attn_norm.weight | Block 17 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 158 | blk.17.attn_output.weight | Block 17 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 159 | blk.17.attn_q.weight | Block 17 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 160 | blk.17.attn_v.weight | Block 17 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 161 | blk.17.ffn_down.weight | Block 17 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 162 | blk.17.ffn_gate.weight | Block 17 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 163 | blk.17.ffn_norm.weight | Block 17 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 164 | blk.17.ffn_up.weight | Block 17 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.17: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.17: 1.6269 bits + + +### Block 18 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 165 | blk.18.attn_k.weight | Block 18 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 166 | blk.18.attn_norm.weight | Block 18 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 167 | blk.18.attn_output.weight | Block 18 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 168 | blk.18.attn_q.weight | Block 18 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 169 | blk.18.attn_v.weight | Block 18 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 170 | blk.18.ffn_down.weight | Block 18 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 171 | blk.18.ffn_gate.weight | Block 18 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 172 | blk.18.ffn_norm.weight | Block 18 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 173 | blk.18.ffn_up.weight | Block 18 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.18: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.18: 1.6762 bits + + +### Block 19 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 174 | blk.19.attn_k.weight | Block 19 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 175 | blk.19.attn_norm.weight | Block 19 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 176 | blk.19.attn_output.weight | Block 19 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 177 | blk.19.attn_q.weight | Block 19 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 178 | blk.19.attn_v.weight | Block 19 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 179 | blk.19.ffn_down.weight | Block 19 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 180 | blk.19.ffn_gate.weight | Block 19 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 181 | blk.19.ffn_norm.weight | Block 19 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 182 | blk.19.ffn_up.weight | Block 19 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.19: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.19: 1.6762 bits + + +### Block 20 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:------|--------:| +| 183 | blk.20.attn_k.weight | Block 20 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 184 | blk.20.attn_norm.weight | Block 20 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 185 | blk.20.attn_output.weight | Block 20 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 186 | blk.20.attn_q.weight | Block 20 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 187 | blk.20.attn_v.weight | Block 20 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 188 | blk.20.ffn_down.weight | Block 20 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 189 | blk.20.ffn_gate.weight | Block 20 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 190 | blk.20.ffn_norm.weight | Block 20 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 191 | blk.20.ffn_up.weight | Block 20 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.20: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.20: 1.6480 bits + + +### Block 21 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:------|--------:| +| 192 | blk.21.attn_k.weight | Block 21 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 193 | blk.21.attn_norm.weight | Block 21 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 194 | blk.21.attn_output.weight | Block 21 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 195 | blk.21.attn_q.weight | Block 21 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 196 | blk.21.attn_v.weight | Block 21 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 197 | blk.21.ffn_down.weight | Block 21 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 198 | blk.21.ffn_gate.weight | Block 21 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 199 | blk.21.ffn_norm.weight | Block 21 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 200 | blk.21.ffn_up.weight | Block 21 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.21: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.21: 1.6973 bits + + +### Block 22 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:------|--------:| +| 201 | blk.22.attn_k.weight | Block 22 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 202 | blk.22.attn_norm.weight | Block 22 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 203 | blk.22.attn_output.weight | Block 22 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 204 | blk.22.attn_q.weight | Block 22 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 205 | blk.22.attn_v.weight | Block 22 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 206 | blk.22.ffn_down.weight | Block 22 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 207 | blk.22.ffn_gate.weight | Block 22 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 208 | blk.22.ffn_norm.weight | Block 22 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 209 | blk.22.ffn_up.weight | Block 22 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.22: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.22: 1.6973 bits + + +### Block 23 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 210 | blk.23.attn_k.weight | Block 23 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 211 | blk.23.attn_norm.weight | Block 23 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 212 | blk.23.attn_output.weight | Block 23 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 213 | blk.23.attn_q.weight | Block 23 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 214 | blk.23.attn_v.weight | Block 23 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 215 | blk.23.ffn_down.weight | Block 23 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 216 | blk.23.ffn_gate.weight | Block 23 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 217 | blk.23.ffn_norm.weight | Block 23 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 218 | blk.23.ffn_up.weight | Block 23 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.23: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.23: 1.6269 bits + + +### Block 24 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:------|--------:| +| 219 | blk.24.attn_k.weight | Block 24 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 220 | blk.24.attn_norm.weight | Block 24 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 221 | blk.24.attn_output.weight | Block 24 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 222 | blk.24.attn_q.weight | Block 24 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 223 | blk.24.attn_v.weight | Block 24 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 224 | blk.24.ffn_down.weight | Block 24 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 225 | blk.24.ffn_gate.weight | Block 24 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 226 | blk.24.ffn_norm.weight | Block 24 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 227 | blk.24.ffn_up.weight | Block 24 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.24: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.24: 1.6973 bits + + +### Block 25 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 228 | blk.25.attn_k.weight | Block 25 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 229 | blk.25.attn_norm.weight | Block 25 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 230 | blk.25.attn_output.weight | Block 25 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 231 | blk.25.attn_q.weight | Block 25 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 232 | blk.25.attn_v.weight | Block 25 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 233 | blk.25.ffn_down.weight | Block 25 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 234 | blk.25.ffn_gate.weight | Block 25 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 235 | blk.25.ffn_norm.weight | Block 25 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 236 | blk.25.ffn_up.weight | Block 25 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.25: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.25: 1.6762 bits + + +### Block 26 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:------|--------:| +| 237 | blk.26.attn_k.weight | Block 26 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 238 | blk.26.attn_norm.weight | Block 26 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 239 | blk.26.attn_output.weight | Block 26 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 240 | blk.26.attn_q.weight | Block 26 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 241 | blk.26.attn_v.weight | Block 26 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 242 | blk.26.ffn_down.weight | Block 26 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 243 | blk.26.ffn_gate.weight | Block 26 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 244 | blk.26.ffn_norm.weight | Block 26 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 245 | blk.26.ffn_up.weight | Block 26 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.26: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.26: 1.6973 bits + + +### Block 27 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 246 | blk.27.attn_k.weight | Block 27 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_M | 1.7500 | +| 247 | blk.27.attn_norm.weight | Block 27 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 248 | blk.27.attn_output.weight | Block 27 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 249 | blk.27.attn_q.weight | Block 27 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 250 | blk.27.attn_v.weight | Block 27 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 251 | blk.27.ffn_down.weight | Block 27 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 252 | blk.27.ffn_gate.weight | Block 27 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 253 | blk.27.ffn_norm.weight | Block 27 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 254 | blk.27.ffn_up.weight | Block 27 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.27: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.27: 1.6309 bits + + +### Block 28 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 255 | blk.28.attn_k.weight | Block 28 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_M | 1.7500 | +| 256 | blk.28.attn_norm.weight | Block 28 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 257 | blk.28.attn_output.weight | Block 28 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 258 | blk.28.attn_q.weight | Block 28 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 259 | blk.28.attn_v.weight | Block 28 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 260 | blk.28.ffn_down.weight | Block 28 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 261 | blk.28.ffn_gate.weight | Block 28 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 262 | blk.28.ffn_norm.weight | Block 28 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 263 | blk.28.ffn_up.weight | Block 28 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.28: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.28: 1.6960 bits + + +### Block 29 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 264 | blk.29.attn_k.weight | Block 29 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 265 | blk.29.attn_norm.weight | Block 29 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 266 | blk.29.attn_output.weight | Block 29 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 267 | blk.29.attn_q.weight | Block 29 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 268 | blk.29.attn_v.weight | Block 29 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 269 | blk.29.ffn_down.weight | Block 29 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 270 | blk.29.ffn_gate.weight | Block 29 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 271 | blk.29.ffn_norm.weight | Block 29 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 272 | blk.29.ffn_up.weight | Block 29 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.29: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.29: 1.6269 bits + + +### Block 30 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 273 | blk.30.attn_k.weight | Block 30 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_M | 1.7500 | +| 274 | blk.30.attn_norm.weight | Block 30 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 275 | blk.30.attn_output.weight | Block 30 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 276 | blk.30.attn_q.weight | Block 30 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 277 | blk.30.attn_v.weight | Block 30 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 278 | blk.30.ffn_down.weight | Block 30 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 279 | blk.30.ffn_gate.weight | Block 30 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 280 | blk.30.ffn_norm.weight | Block 30 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 281 | blk.30.ffn_up.weight | Block 30 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.30: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.30: 1.6960 bits + + +### Block 31 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:------|--------:| +| 282 | blk.31.attn_k.weight | Block 31 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 283 | blk.31.attn_norm.weight | Block 31 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 284 | blk.31.attn_output.weight | Block 31 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 285 | blk.31.attn_q.weight | Block 31 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 286 | blk.31.attn_v.weight | Block 31 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 287 | blk.31.ffn_down.weight | Block 31 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 288 | blk.31.ffn_gate.weight | Block 31 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 289 | blk.31.ffn_norm.weight | Block 31 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 290 | blk.31.ffn_up.weight | Block 31 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.31: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.31: 1.6973 bits + + +### Block 32 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 291 | blk.32.attn_k.weight | Block 32 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 292 | blk.32.attn_norm.weight | Block 32 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 293 | blk.32.attn_output.weight | Block 32 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 294 | blk.32.attn_q.weight | Block 32 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 295 | blk.32.attn_v.weight | Block 32 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 296 | blk.32.ffn_down.weight | Block 32 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 297 | blk.32.ffn_gate.weight | Block 32 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 298 | blk.32.ffn_norm.weight | Block 32 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 299 | blk.32.ffn_up.weight | Block 32 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.32: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.32: 1.6762 bits + + +### Block 33 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:------|--------:| +| 300 | blk.33.attn_k.weight | Block 33 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 301 | blk.33.attn_norm.weight | Block 33 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 302 | blk.33.attn_output.weight | Block 33 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 303 | blk.33.attn_q.weight | Block 33 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 304 | blk.33.attn_v.weight | Block 33 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 305 | blk.33.ffn_down.weight | Block 33 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 306 | blk.33.ffn_gate.weight | Block 33 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 307 | blk.33.ffn_norm.weight | Block 33 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 308 | blk.33.ffn_up.weight | Block 33 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.33: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.33: 1.6480 bits + + +### Block 34 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 309 | blk.34.attn_k.weight | Block 34 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 310 | blk.34.attn_norm.weight | Block 34 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 311 | blk.34.attn_output.weight | Block 34 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 312 | blk.34.attn_q.weight | Block 34 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 313 | blk.34.attn_v.weight | Block 34 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 314 | blk.34.ffn_down.weight | Block 34 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 315 | blk.34.ffn_gate.weight | Block 34 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 316 | blk.34.ffn_norm.weight | Block 34 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 317 | blk.34.ffn_up.weight | Block 34 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.34: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.34: 1.6762 bits + + +### Block 35 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:------|--------:| +| 318 | blk.35.attn_k.weight | Block 35 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 319 | blk.35.attn_norm.weight | Block 35 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 320 | blk.35.attn_output.weight | Block 35 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 321 | blk.35.attn_q.weight | Block 35 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 322 | blk.35.attn_v.weight | Block 35 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 323 | blk.35.ffn_down.weight | Block 35 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 324 | blk.35.ffn_gate.weight | Block 35 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 325 | blk.35.ffn_norm.weight | Block 35 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 326 | blk.35.ffn_up.weight | Block 35 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.35: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.35: 1.6480 bits + + +### Block 36 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:------|--------:| +| 327 | blk.36.attn_k.weight | Block 36 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 328 | blk.36.attn_norm.weight | Block 36 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 329 | blk.36.attn_output.weight | Block 36 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 330 | blk.36.attn_q.weight | Block 36 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 331 | blk.36.attn_v.weight | Block 36 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 332 | blk.36.ffn_down.weight | Block 36 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 333 | blk.36.ffn_gate.weight | Block 36 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 334 | blk.36.ffn_norm.weight | Block 36 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 335 | blk.36.ffn_up.weight | Block 36 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.36: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.36: 1.6480 bits + + +### Block 37 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:------|--------:| +| 336 | blk.37.attn_k.weight | Block 37 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 337 | blk.37.attn_norm.weight | Block 37 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 338 | blk.37.attn_output.weight | Block 37 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 339 | blk.37.attn_q.weight | Block 37 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 340 | blk.37.attn_v.weight | Block 37 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 341 | blk.37.ffn_down.weight | Block 37 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 342 | blk.37.ffn_gate.weight | Block 37 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 343 | blk.37.ffn_norm.weight | Block 37 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 344 | blk.37.ffn_up.weight | Block 37 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.37: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.37: 1.6480 bits + + +### Block 38 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:------|--------:| +| 345 | blk.38.attn_k.weight | Block 38 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 346 | blk.38.attn_norm.weight | Block 38 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 347 | blk.38.attn_output.weight | Block 38 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 348 | blk.38.attn_q.weight | Block 38 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 349 | blk.38.attn_v.weight | Block 38 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 350 | blk.38.ffn_down.weight | Block 38 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 351 | blk.38.ffn_gate.weight | Block 38 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 352 | blk.38.ffn_norm.weight | Block 38 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 353 | blk.38.ffn_up.weight | Block 38 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.38: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.38: 1.6480 bits + + +### Block 39 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 354 | blk.39.attn_k.weight | Block 39 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 355 | blk.39.attn_norm.weight | Block 39 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 356 | blk.39.attn_output.weight | Block 39 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 357 | blk.39.attn_q.weight | Block 39 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 358 | blk.39.attn_v.weight | Block 39 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_S | 1.5625 | +| 359 | blk.39.ffn_down.weight | Block 39 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_S | 1.5625 | +| 360 | blk.39.ffn_gate.weight | Block 39 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 361 | blk.39.ffn_norm.weight | Block 39 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 362 | blk.39.ffn_up.weight | Block 39 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | + +- Total elements in blk.39: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.39: 1.6269 bits + + +Total BPW for granite-4.1-8b-Q1_L.gguf: 1.7500 bits diff --git a/scores/granite-4.1-8b-Q2_K.md b/scores/granite-4.1-8b-Q2_K.md new file mode 100644 index 0000000..7ffb036 --- /dev/null +++ b/scores/granite-4.1-8b-Q2_K.md @@ -0,0 +1,1247 @@ +# granite-4.1-8b-WIP/granite-4.1-8b-Q2_K.gguf - GGUF Internal File Dump + +- Endian: LITTLE endian + +## Key Value Metadata Store + +There are 42 key-value pairs in this file + +| POS | TYPE | Count | Key | Value | +|----:|:---------|-------:|:-----------------------------------------|:--------------------------------------------------------------------| +| 1 | UINT32 | 1 | GGUF.version | 3 | +| 2 | UINT64 | 1 | GGUF.tensor_count | 363 | +| 3 | UINT64 | 1 | GGUF.kv_count | 39 | +| 4 | STRING | 1 | general.architecture | `granite` | +| 5 | STRING | 1 | general.type | `model` | +| 6 | STRING | 1 | general.name | `Granite 4.1 8b` | +| 7 | STRING | 1 | general.basename | `granite-4.1` | +| 8 | STRING | 1 | general.size_label | `8B` | +| 9 | STRING | 1 | general.license | `apache-2.0` | +| 10 | [STRING] | 2 | general.tags | [ `language`, `granite-4.1` ] | +| 11 | UINT32 | 1 | granite.block_count | 40 | +| 12 | UINT32 | 1 | granite.context_length | 131072 | +| 13 | UINT32 | 1 | granite.embedding_length | 4096 | +| 14 | UINT32 | 1 | granite.feed_forward_length | 12800 | +| 15 | UINT32 | 1 | granite.attention.head_count | 32 | +| 16 | UINT32 | 1 | granite.attention.head_count_kv | 8 | +| 17 | FLOAT32 | 1 | granite.rope.freq_base | 10000000.0 | +| 18 | FLOAT32 | 1 | granite.attention.layer_norm_rms_epsilon | 1e-05 | +| 19 | UINT32 | 1 | granite.vocab_size | 100352 | +| 20 | UINT32 | 1 | granite.rope.dimension_count | 128 | +| 21 | FLOAT32 | 1 | granite.attention.scale | 0.0078125 | +| 22 | FLOAT32 | 1 | granite.embedding_scale | 12.0 | +| 23 | FLOAT32 | 1 | granite.residual_scale | 0.22 | +| 24 | FLOAT32 | 1 | granite.logit_scale | 16.0 | +| 25 | STRING | 1 | tokenizer.ggml.model | `gpt2` | +| 26 | STRING | 1 | tokenizer.ggml.pre | `granite-docling` | +| 27 | [STRING] | 100352 | tokenizer.ggml.tokens | [ `!`, `"`, `#`, `$`, `%`, ... ] | +| 28 | [INT32] | 100352 | tokenizer.ggml.token_type | [ 1, 1, 1, 1, 1, 1, 1, ... ] | +| 29 | [STRING] | 100000 | tokenizer.ggml.merges | [ `Ġ Ġ`, `ĠĠ ĠĠ`, `i n`, `Ġ t`, `ĠĠĠĠ ĠĠĠĠ`, ... ] | +| 30 | UINT32 | 1 | tokenizer.ggml.bos_token_id | 100257 | +| 31 | UINT32 | 1 | tokenizer.ggml.eos_token_id | 100257 | +| 32 | UINT32 | 1 | tokenizer.ggml.unknown_token_id | 100269 | +| 33 | UINT32 | 1 | tokenizer.ggml.padding_token_id | 100256 | +| 34 | BOOL | 1 | tokenizer.ggml.add_bos_token | False | +| 35 | STRING | 1 | tokenizer.chat_template | `{%- set tools_system_message_p`...`end_of_role|>' }}{%- endif %}` | +| 36 | BOOL | 1 | tokenizer.ggml.add_space_prefix | False | +| 37 | UINT32 | 1 | general.quantization_version | 2 | +| 38 | UINT32 | 1 | general.file_type | 28 | +| 39 | STRING | 1 | quantize.imatrix.file | `imatrix/imatrix-granite-4.1-8b-medium.gguf` | +| 40 | STRING | 1 | quantize.imatrix.dataset | `../../datasets/imatrix/combined_all_medium.txt` | +| 41 | UINT32 | 1 | quantize.imatrix.entries_count | 280 | +| 42 | UINT32 | 1 | quantize.imatrix.chunks_count | 3601 | + +## Tensors Overview ~9B Elements + +Total number of elements in all tensors: 8791592960 Elements + +- [granite-4.1-8b-WIP/granite-4.1-8b-Q2\_K.gguf - GGUF Internal File Dump](#granite-41-8b-wipgranite-41-8b-q2_kgguf---gguf-internal-file-dump) + - [Key Value Metadata Store](#key-value-metadata-store) + - [Tensors Overview ~9B Elements](#tensors-overview-9b-elements) + - [Tensor Data Offset](#tensor-data-offset) + - [Base Tensor Group : ~822M Elements](#base-tensor-group--822m-elements) + - [Block 0 Tensor Group : ~199M Elements](#block-0-tensor-group--199m-elements) + - [Block 1 Tensor Group : ~199M Elements](#block-1-tensor-group--199m-elements) + - [Block 2 Tensor Group : ~199M Elements](#block-2-tensor-group--199m-elements) + - [Block 3 Tensor Group : ~199M Elements](#block-3-tensor-group--199m-elements) + - [Block 4 Tensor Group : ~199M Elements](#block-4-tensor-group--199m-elements) + - [Block 5 Tensor Group : ~199M Elements](#block-5-tensor-group--199m-elements) + - [Block 6 Tensor Group : ~199M Elements](#block-6-tensor-group--199m-elements) + - [Block 7 Tensor Group : ~199M Elements](#block-7-tensor-group--199m-elements) + - [Block 8 Tensor Group : ~199M Elements](#block-8-tensor-group--199m-elements) + - [Block 9 Tensor Group : ~199M Elements](#block-9-tensor-group--199m-elements) + - [Block 10 Tensor Group : ~199M Elements](#block-10-tensor-group--199m-elements) + - [Block 11 Tensor Group : ~199M Elements](#block-11-tensor-group--199m-elements) + - [Block 12 Tensor Group : ~199M Elements](#block-12-tensor-group--199m-elements) + - [Block 13 Tensor Group : ~199M Elements](#block-13-tensor-group--199m-elements) + - [Block 14 Tensor Group : ~199M Elements](#block-14-tensor-group--199m-elements) + - [Block 15 Tensor Group : ~199M Elements](#block-15-tensor-group--199m-elements) + - [Block 16 Tensor Group : ~199M Elements](#block-16-tensor-group--199m-elements) + - [Block 17 Tensor Group : ~199M Elements](#block-17-tensor-group--199m-elements) + - [Block 18 Tensor Group : ~199M Elements](#block-18-tensor-group--199m-elements) + - [Block 19 Tensor Group : ~199M Elements](#block-19-tensor-group--199m-elements) + - [Block 20 Tensor Group : ~199M Elements](#block-20-tensor-group--199m-elements) + - [Block 21 Tensor Group : ~199M Elements](#block-21-tensor-group--199m-elements) + - [Block 22 Tensor Group : ~199M Elements](#block-22-tensor-group--199m-elements) + - [Block 23 Tensor Group : ~199M Elements](#block-23-tensor-group--199m-elements) + - [Block 24 Tensor Group : ~199M Elements](#block-24-tensor-group--199m-elements) + - [Block 25 Tensor Group : ~199M Elements](#block-25-tensor-group--199m-elements) + - [Block 26 Tensor Group : ~199M Elements](#block-26-tensor-group--199m-elements) + - [Block 27 Tensor Group : ~199M Elements](#block-27-tensor-group--199m-elements) + - [Block 28 Tensor Group : ~199M Elements](#block-28-tensor-group--199m-elements) + - [Block 29 Tensor Group : ~199M Elements](#block-29-tensor-group--199m-elements) + - [Block 30 Tensor Group : ~199M Elements](#block-30-tensor-group--199m-elements) + - [Block 31 Tensor Group : ~199M Elements](#block-31-tensor-group--199m-elements) + - [Block 32 Tensor Group : ~199M Elements](#block-32-tensor-group--199m-elements) + - [Block 33 Tensor Group : ~199M Elements](#block-33-tensor-group--199m-elements) + - [Block 34 Tensor Group : ~199M Elements](#block-34-tensor-group--199m-elements) + - [Block 35 Tensor Group : ~199M Elements](#block-35-tensor-group--199m-elements) + - [Block 36 Tensor Group : ~199M Elements](#block-36-tensor-group--199m-elements) + - [Block 37 Tensor Group : ~199M Elements](#block-37-tensor-group--199m-elements) + - [Block 38 Tensor Group : ~199M Elements](#block-38-tensor-group--199m-elements) + - [Block 39 Tensor Group : ~199M Elements](#block-39-tensor-group--199m-elements) + +### Tensor Data Offset + +This table contains the offset and data segment relative to start of file + +| T_ID | Tensor Layer Name | Data Offset (B) | Data Size (B) | +|-----:|:--------------------------|-----------------:|-----------------:| +| 0 | output.weight | 0x367400 | 0xa870000 | +| 1 | output_norm.weight | 0xabd7400 | 0x4000 | +| 2 | token_embd.weight | 0xabdb400 | 0x80a0000 | +| 3 | blk.0.attn_k.weight | 0x12c7b400 | 0xe0000 | +| 4 | blk.0.attn_norm.weight | 0x12d5b400 | 0x4000 | +| 5 | blk.0.attn_output.weight | 0x12d5f400 | 0x540000 | +| 6 | blk.0.attn_q.weight | 0x1329f400 | 0x380000 | +| 7 | blk.0.attn_v.weight | 0x1361f400 | 0xe0000 | +| 8 | blk.0.ffn_down.weight | 0x136ff400 | 0xaf0000 | +| 9 | blk.0.ffn_gate.weight | 0x141ef400 | 0x9c4000 | +| 10 | blk.0.ffn_norm.weight | 0x14bb3400 | 0x4000 | +| 11 | blk.0.ffn_up.weight | 0x14bb7400 | 0xe74000 | +| 12 | blk.1.attn_k.weight | 0x15a2b400 | 0x128000 | +| 13 | blk.1.attn_norm.weight | 0x15b53400 | 0x4000 | +| 14 | blk.1.attn_output.weight | 0x15b57400 | 0x6e0000 | +| 15 | blk.1.attn_q.weight | 0x16237400 | 0x4a0000 | +| 16 | blk.1.attn_v.weight | 0x166d7400 | 0x188000 | +| 17 | blk.1.ffn_down.weight | 0x1685f400 | 0x1004000 | +| 18 | blk.1.ffn_gate.weight | 0x17863400 | 0xce4000 | +| 19 | blk.1.ffn_norm.weight | 0x18547400 | 0x4000 | +| 20 | blk.1.ffn_up.weight | 0x1854b400 | 0x1004000 | +| 21 | blk.2.attn_k.weight | 0x1954f400 | 0x128000 | +| 22 | blk.2.attn_norm.weight | 0x19677400 | 0x4000 | +| 23 | blk.2.attn_output.weight | 0x1967b400 | 0x6e0000 | +| 24 | blk.2.attn_q.weight | 0x19d5b400 | 0x520000 | +| 25 | blk.2.attn_v.weight | 0x1a27b400 | 0x188000 | +| 26 | blk.2.ffn_down.weight | 0x1a403400 | 0x1004000 | +| 27 | blk.2.ffn_gate.weight | 0x1b407400 | 0xce4000 | +| 28 | blk.2.ffn_norm.weight | 0x1c0eb400 | 0x4000 | +| 29 | blk.2.ffn_up.weight | 0x1c0ef400 | 0x1004000 | +| 30 | blk.3.attn_k.weight | 0x1d0f3400 | 0x108000 | +| 31 | blk.3.attn_norm.weight | 0x1d1fb400 | 0x4000 | +| 32 | blk.3.attn_output.weight | 0x1d1ff400 | 0x6e0000 | +| 33 | blk.3.attn_q.weight | 0x1d8df400 | 0x4a0000 | +| 34 | blk.3.attn_v.weight | 0x1dd7f400 | 0x188000 | +| 35 | blk.3.ffn_down.weight | 0x1df07400 | 0xce4000 | +| 36 | blk.3.ffn_gate.weight | 0x1ebeb400 | 0xce4000 | +| 37 | blk.3.ffn_norm.weight | 0x1f8cf400 | 0x4000 | +| 38 | blk.3.ffn_up.weight | 0x1f8d3400 | 0xe74000 | +| 39 | blk.4.attn_k.weight | 0x20747400 | 0x128000 | +| 40 | blk.4.attn_norm.weight | 0x2086f400 | 0x4000 | +| 41 | blk.4.attn_output.weight | 0x20873400 | 0x6e0000 | +| 42 | blk.4.attn_q.weight | 0x20f53400 | 0x520000 | +| 43 | blk.4.attn_v.weight | 0x21473400 | 0x188000 | +| 44 | blk.4.ffn_down.weight | 0x215fb400 | 0x1004000 | +| 45 | blk.4.ffn_gate.weight | 0x225ff400 | 0xce4000 | +| 46 | blk.4.ffn_norm.weight | 0x232e3400 | 0x4000 | +| 47 | blk.4.ffn_up.weight | 0x232e7400 | 0xce4000 | +| 48 | blk.5.attn_k.weight | 0x23fcb400 | 0x128000 | +| 49 | blk.5.attn_norm.weight | 0x240f3400 | 0x4000 | +| 50 | blk.5.attn_output.weight | 0x240f7400 | 0x6e0000 | +| 51 | blk.5.attn_q.weight | 0x247d7400 | 0x520000 | +| 52 | blk.5.attn_v.weight | 0x24cf7400 | 0x150000 | +| 53 | blk.5.ffn_down.weight | 0x24e47400 | 0x1068000 | +| 54 | blk.5.ffn_gate.weight | 0x25eaf400 | 0xce4000 | +| 55 | blk.5.ffn_norm.weight | 0x26b93400 | 0x4000 | +| 56 | blk.5.ffn_up.weight | 0x26b97400 | 0xe74000 | +| 57 | blk.6.attn_k.weight | 0x27a0b400 | 0x128000 | +| 58 | blk.6.attn_norm.weight | 0x27b33400 | 0x4000 | +| 59 | blk.6.attn_output.weight | 0x27b37400 | 0x6e0000 | +| 60 | blk.6.attn_q.weight | 0x28217400 | 0x4a0000 | +| 61 | blk.6.attn_v.weight | 0x286b7400 | 0x150000 | +| 62 | blk.6.ffn_down.weight | 0x28807400 | 0x1004000 | +| 63 | blk.6.ffn_gate.weight | 0x2980b400 | 0xce4000 | +| 64 | blk.6.ffn_norm.weight | 0x2a4ef400 | 0x4000 | +| 65 | blk.6.ffn_up.weight | 0x2a4f3400 | 0xe74000 | +| 66 | blk.7.attn_k.weight | 0x2b367400 | 0x148000 | +| 67 | blk.7.attn_norm.weight | 0x2b4af400 | 0x4000 | +| 68 | blk.7.attn_output.weight | 0x2b4b3400 | 0x6e0000 | +| 69 | blk.7.attn_q.weight | 0x2bb93400 | 0x520000 | +| 70 | blk.7.attn_v.weight | 0x2c0b3400 | 0x150000 | +| 71 | blk.7.ffn_down.weight | 0x2c203400 | 0x1004000 | +| 72 | blk.7.ffn_gate.weight | 0x2d207400 | 0xce4000 | +| 73 | blk.7.ffn_norm.weight | 0x2deeb400 | 0x4000 | +| 74 | blk.7.ffn_up.weight | 0x2deef400 | 0x1004000 | +| 75 | blk.8.attn_k.weight | 0x2eef3400 | 0x128000 | +| 76 | blk.8.attn_norm.weight | 0x2f01b400 | 0x4000 | +| 77 | blk.8.attn_output.weight | 0x2f01f400 | 0x6e0000 | +| 78 | blk.8.attn_q.weight | 0x2f6ff400 | 0x520000 | +| 79 | blk.8.attn_v.weight | 0x2fc1f400 | 0x150000 | +| 80 | blk.8.ffn_down.weight | 0x2fd6f400 | 0x1004000 | +| 81 | blk.8.ffn_gate.weight | 0x30d73400 | 0xce4000 | +| 82 | blk.8.ffn_norm.weight | 0x31a57400 | 0x4000 | +| 83 | blk.8.ffn_up.weight | 0x31a5b400 | 0x1004000 | +| 84 | blk.9.attn_k.weight | 0x32a5f400 | 0x128000 | +| 85 | blk.9.attn_norm.weight | 0x32b87400 | 0x4000 | +| 86 | blk.9.attn_output.weight | 0x32b8b400 | 0x6e0000 | +| 87 | blk.9.attn_q.weight | 0x3326b400 | 0x520000 | +| 88 | blk.9.attn_v.weight | 0x3378b400 | 0x150000 | +| 89 | blk.9.ffn_down.weight | 0x338db400 | 0x1004000 | +| 90 | blk.9.ffn_gate.weight | 0x348df400 | 0xce4000 | +| 91 | blk.9.ffn_norm.weight | 0x355c3400 | 0x4000 | +| 92 | blk.9.ffn_up.weight | 0x355c7400 | 0x1004000 | +| 93 | blk.10.attn_k.weight | 0x365cb400 | 0x128000 | +| 94 | blk.10.attn_norm.weight | 0x366f3400 | 0x4000 | +| 95 | blk.10.attn_output.weight | 0x366f7400 | 0x6e0000 | +| 96 | blk.10.attn_q.weight | 0x36dd7400 | 0x520000 | +| 97 | blk.10.attn_v.weight | 0x372f7400 | 0x150000 | +| 98 | blk.10.ffn_down.weight | 0x37447400 | 0x1004000 | +| 99 | blk.10.ffn_gate.weight | 0x3844b400 | 0xce4000 | +| 100 | blk.10.ffn_norm.weight | 0x3912f400 | 0x4000 | +| 101 | blk.10.ffn_up.weight | 0x39133400 | 0x1004000 | +| 102 | blk.11.attn_k.weight | 0x3a137400 | 0x128000 | +| 103 | blk.11.attn_norm.weight | 0x3a25f400 | 0x4000 | +| 104 | blk.11.attn_output.weight | 0x3a263400 | 0x6e0000 | +| 105 | blk.11.attn_q.weight | 0x3a943400 | 0x520000 | +| 106 | blk.11.attn_v.weight | 0x3ae63400 | 0x150000 | +| 107 | blk.11.ffn_down.weight | 0x3afb3400 | 0x1004000 | +| 108 | blk.11.ffn_gate.weight | 0x3bfb7400 | 0x9c4000 | +| 109 | blk.11.ffn_norm.weight | 0x3c97b400 | 0x4000 | +| 110 | blk.11.ffn_up.weight | 0x3c97f400 | 0x1004000 | +| 111 | blk.12.attn_k.weight | 0x3d983400 | 0x128000 | +| 112 | blk.12.attn_norm.weight | 0x3daab400 | 0x4000 | +| 113 | blk.12.attn_output.weight | 0x3daaf400 | 0x6e0000 | +| 114 | blk.12.attn_q.weight | 0x3e18f400 | 0x520000 | +| 115 | blk.12.attn_v.weight | 0x3e6af400 | 0x150000 | +| 116 | blk.12.ffn_down.weight | 0x3e7ff400 | 0x1004000 | +| 117 | blk.12.ffn_gate.weight | 0x3f803400 | 0xce4000 | +| 118 | blk.12.ffn_norm.weight | 0x404e7400 | 0x4000 | +| 119 | blk.12.ffn_up.weight | 0x404eb400 | 0x1004000 | +| 120 | blk.13.attn_k.weight | 0x414ef400 | 0x128000 | +| 121 | blk.13.attn_norm.weight | 0x41617400 | 0x4000 | +| 122 | blk.13.attn_output.weight | 0x4161b400 | 0x6e0000 | +| 123 | blk.13.attn_q.weight | 0x41cfb400 | 0x520000 | +| 124 | blk.13.attn_v.weight | 0x4221b400 | 0x150000 | +| 125 | blk.13.ffn_down.weight | 0x4236b400 | 0x1004000 | +| 126 | blk.13.ffn_gate.weight | 0x4336f400 | 0xce4000 | +| 127 | blk.13.ffn_norm.weight | 0x44053400 | 0x4000 | +| 128 | blk.13.ffn_up.weight | 0x44057400 | 0x1004000 | +| 129 | blk.14.attn_k.weight | 0x4505b400 | 0x128000 | +| 130 | blk.14.attn_norm.weight | 0x45183400 | 0x4000 | +| 131 | blk.14.attn_output.weight | 0x45187400 | 0x6e0000 | +| 132 | blk.14.attn_q.weight | 0x45867400 | 0x520000 | +| 133 | blk.14.attn_v.weight | 0x45d87400 | 0x150000 | +| 134 | blk.14.ffn_down.weight | 0x45ed7400 | 0x1004000 | +| 135 | blk.14.ffn_gate.weight | 0x46edb400 | 0xce4000 | +| 136 | blk.14.ffn_norm.weight | 0x47bbf400 | 0x4000 | +| 137 | blk.14.ffn_up.weight | 0x47bc3400 | 0x1004000 | +| 138 | blk.15.attn_k.weight | 0x48bc7400 | 0x148000 | +| 139 | blk.15.attn_norm.weight | 0x48d0f400 | 0x4000 | +| 140 | blk.15.attn_output.weight | 0x48d13400 | 0x6e0000 | +| 141 | blk.15.attn_q.weight | 0x493f3400 | 0x520000 | +| 142 | blk.15.attn_v.weight | 0x49913400 | 0x150000 | +| 143 | blk.15.ffn_down.weight | 0x49a63400 | 0x1004000 | +| 144 | blk.15.ffn_gate.weight | 0x4aa67400 | 0xce4000 | +| 145 | blk.15.ffn_norm.weight | 0x4b74b400 | 0x4000 | +| 146 | blk.15.ffn_up.weight | 0x4b74f400 | 0x1004000 | +| 147 | blk.16.attn_k.weight | 0x4c753400 | 0x148000 | +| 148 | blk.16.attn_norm.weight | 0x4c89b400 | 0x4000 | +| 149 | blk.16.attn_output.weight | 0x4c89f400 | 0x620000 | +| 150 | blk.16.attn_q.weight | 0x4cebf400 | 0x520000 | +| 151 | blk.16.attn_v.weight | 0x4d3df400 | 0x150000 | +| 152 | blk.16.ffn_down.weight | 0x4d52f400 | 0x1004000 | +| 153 | blk.16.ffn_gate.weight | 0x4e533400 | 0xce4000 | +| 154 | blk.16.ffn_norm.weight | 0x4f217400 | 0x4000 | +| 155 | blk.16.ffn_up.weight | 0x4f21b400 | 0x1004000 | +| 156 | blk.17.attn_k.weight | 0x5021f400 | 0x128000 | +| 157 | blk.17.attn_norm.weight | 0x50347400 | 0x4000 | +| 158 | blk.17.attn_output.weight | 0x5034b400 | 0x6e0000 | +| 159 | blk.17.attn_q.weight | 0x50a2b400 | 0x520000 | +| 160 | blk.17.attn_v.weight | 0x50f4b400 | 0x150000 | +| 161 | blk.17.ffn_down.weight | 0x5109b400 | 0x1004000 | +| 162 | blk.17.ffn_gate.weight | 0x5209f400 | 0xce4000 | +| 163 | blk.17.ffn_norm.weight | 0x52d83400 | 0x4000 | +| 164 | blk.17.ffn_up.weight | 0x52d87400 | 0x1004000 | +| 165 | blk.18.attn_k.weight | 0x53d8b400 | 0x128000 | +| 166 | blk.18.attn_norm.weight | 0x53eb3400 | 0x4000 | +| 167 | blk.18.attn_output.weight | 0x53eb7400 | 0x6e0000 | +| 168 | blk.18.attn_q.weight | 0x54597400 | 0x520000 | +| 169 | blk.18.attn_v.weight | 0x54ab7400 | 0x150000 | +| 170 | blk.18.ffn_down.weight | 0x54c07400 | 0x1004000 | +| 171 | blk.18.ffn_gate.weight | 0x55c0b400 | 0xce4000 | +| 172 | blk.18.ffn_norm.weight | 0x568ef400 | 0x4000 | +| 173 | blk.18.ffn_up.weight | 0x568f3400 | 0x1004000 | +| 174 | blk.19.attn_k.weight | 0x578f7400 | 0x128000 | +| 175 | blk.19.attn_norm.weight | 0x57a1f400 | 0x4000 | +| 176 | blk.19.attn_output.weight | 0x57a23400 | 0x6e0000 | +| 177 | blk.19.attn_q.weight | 0x58103400 | 0x520000 | +| 178 | blk.19.attn_v.weight | 0x58623400 | 0x150000 | +| 179 | blk.19.ffn_down.weight | 0x58773400 | 0x1004000 | +| 180 | blk.19.ffn_gate.weight | 0x59777400 | 0xce4000 | +| 181 | blk.19.ffn_norm.weight | 0x5a45b400 | 0x4000 | +| 182 | blk.19.ffn_up.weight | 0x5a45f400 | 0x1004000 | +| 183 | blk.20.attn_k.weight | 0x5b463400 | 0x148000 | +| 184 | blk.20.attn_norm.weight | 0x5b5ab400 | 0x4000 | +| 185 | blk.20.attn_output.weight | 0x5b5af400 | 0x6e0000 | +| 186 | blk.20.attn_q.weight | 0x5bc8f400 | 0x520000 | +| 187 | blk.20.attn_v.weight | 0x5c1af400 | 0x150000 | +| 188 | blk.20.ffn_down.weight | 0x5c2ff400 | 0x1004000 | +| 189 | blk.20.ffn_gate.weight | 0x5d303400 | 0xce4000 | +| 190 | blk.20.ffn_norm.weight | 0x5dfe7400 | 0x4000 | +| 191 | blk.20.ffn_up.weight | 0x5dfeb400 | 0x1004000 | +| 192 | blk.21.attn_k.weight | 0x5efef400 | 0x128000 | +| 193 | blk.21.attn_norm.weight | 0x5f117400 | 0x4000 | +| 194 | blk.21.attn_output.weight | 0x5f11b400 | 0x6e0000 | +| 195 | blk.21.attn_q.weight | 0x5f7fb400 | 0x520000 | +| 196 | blk.21.attn_v.weight | 0x5fd1b400 | 0x150000 | +| 197 | blk.21.ffn_down.weight | 0x5fe6b400 | 0x1004000 | +| 198 | blk.21.ffn_gate.weight | 0x60e6f400 | 0xce4000 | +| 199 | blk.21.ffn_norm.weight | 0x61b53400 | 0x4000 | +| 200 | blk.21.ffn_up.weight | 0x61b57400 | 0x1004000 | +| 201 | blk.22.attn_k.weight | 0x62b5b400 | 0x128000 | +| 202 | blk.22.attn_norm.weight | 0x62c83400 | 0x4000 | +| 203 | blk.22.attn_output.weight | 0x62c87400 | 0x6e0000 | +| 204 | blk.22.attn_q.weight | 0x63367400 | 0x520000 | +| 205 | blk.22.attn_v.weight | 0x63887400 | 0x150000 | +| 206 | blk.22.ffn_down.weight | 0x639d7400 | 0x1004000 | +| 207 | blk.22.ffn_gate.weight | 0x649db400 | 0xce4000 | +| 208 | blk.22.ffn_norm.weight | 0x656bf400 | 0x4000 | +| 209 | blk.22.ffn_up.weight | 0x656c3400 | 0x1004000 | +| 210 | blk.23.attn_k.weight | 0x666c7400 | 0x128000 | +| 211 | blk.23.attn_norm.weight | 0x667ef400 | 0x4000 | +| 212 | blk.23.attn_output.weight | 0x667f3400 | 0x6e0000 | +| 213 | blk.23.attn_q.weight | 0x66ed3400 | 0x520000 | +| 214 | blk.23.attn_v.weight | 0x673f3400 | 0x150000 | +| 215 | blk.23.ffn_down.weight | 0x67543400 | 0x1004000 | +| 216 | blk.23.ffn_gate.weight | 0x68547400 | 0xce4000 | +| 217 | blk.23.ffn_norm.weight | 0x6922b400 | 0x4000 | +| 218 | blk.23.ffn_up.weight | 0x6922f400 | 0xe74000 | +| 219 | blk.24.attn_k.weight | 0x6a0a3400 | 0x128000 | +| 220 | blk.24.attn_norm.weight | 0x6a1cb400 | 0x4000 | +| 221 | blk.24.attn_output.weight | 0x6a1cf400 | 0x6e0000 | +| 222 | blk.24.attn_q.weight | 0x6a8af400 | 0x4a0000 | +| 223 | blk.24.attn_v.weight | 0x6ad4f400 | 0x150000 | +| 224 | blk.24.ffn_down.weight | 0x6ae9f400 | 0x1004000 | +| 225 | blk.24.ffn_gate.weight | 0x6bea3400 | 0xce4000 | +| 226 | blk.24.ffn_norm.weight | 0x6cb87400 | 0x4000 | +| 227 | blk.24.ffn_up.weight | 0x6cb8b400 | 0xe74000 | +| 228 | blk.25.attn_k.weight | 0x6d9ff400 | 0x128000 | +| 229 | blk.25.attn_norm.weight | 0x6db27400 | 0x4000 | +| 230 | blk.25.attn_output.weight | 0x6db2b400 | 0x6e0000 | +| 231 | blk.25.attn_q.weight | 0x6e20b400 | 0x520000 | +| 232 | blk.25.attn_v.weight | 0x6e72b400 | 0x150000 | +| 233 | blk.25.ffn_down.weight | 0x6e87b400 | 0x1004000 | +| 234 | blk.25.ffn_gate.weight | 0x6f87f400 | 0xce4000 | +| 235 | blk.25.ffn_norm.weight | 0x70563400 | 0x4000 | +| 236 | blk.25.ffn_up.weight | 0x70567400 | 0x1004000 | +| 237 | blk.26.attn_k.weight | 0x7156b400 | 0x128000 | +| 238 | blk.26.attn_norm.weight | 0x71693400 | 0x4000 | +| 239 | blk.26.attn_output.weight | 0x71697400 | 0x6e0000 | +| 240 | blk.26.attn_q.weight | 0x71d77400 | 0x520000 | +| 241 | blk.26.attn_v.weight | 0x72297400 | 0x150000 | +| 242 | blk.26.ffn_down.weight | 0x723e7400 | 0x1004000 | +| 243 | blk.26.ffn_gate.weight | 0x733eb400 | 0xce4000 | +| 244 | blk.26.ffn_norm.weight | 0x740cf400 | 0x4000 | +| 245 | blk.26.ffn_up.weight | 0x740d3400 | 0x1004000 | +| 246 | blk.27.attn_k.weight | 0x750d7400 | 0x148000 | +| 247 | blk.27.attn_norm.weight | 0x7521f400 | 0x4000 | +| 248 | blk.27.attn_output.weight | 0x75223400 | 0x6e0000 | +| 249 | blk.27.attn_q.weight | 0x75903400 | 0x520000 | +| 250 | blk.27.attn_v.weight | 0x75e23400 | 0x150000 | +| 251 | blk.27.ffn_down.weight | 0x75f73400 | 0x1004000 | +| 252 | blk.27.ffn_gate.weight | 0x76f77400 | 0xce4000 | +| 253 | blk.27.ffn_norm.weight | 0x77c5b400 | 0x4000 | +| 254 | blk.27.ffn_up.weight | 0x77c5f400 | 0xe74000 | +| 255 | blk.28.attn_k.weight | 0x78ad3400 | 0x148000 | +| 256 | blk.28.attn_norm.weight | 0x78c1b400 | 0x4000 | +| 257 | blk.28.attn_output.weight | 0x78c1f400 | 0x6e0000 | +| 258 | blk.28.attn_q.weight | 0x792ff400 | 0x520000 | +| 259 | blk.28.attn_v.weight | 0x7981f400 | 0x150000 | +| 260 | blk.28.ffn_down.weight | 0x7996f400 | 0x1004000 | +| 261 | blk.28.ffn_gate.weight | 0x7a973400 | 0xce4000 | +| 262 | blk.28.ffn_norm.weight | 0x7b657400 | 0x4000 | +| 263 | blk.28.ffn_up.weight | 0x7b65b400 | 0x1004000 | +| 264 | blk.29.attn_k.weight | 0x7c65f400 | 0x128000 | +| 265 | blk.29.attn_norm.weight | 0x7c787400 | 0x4000 | +| 266 | blk.29.attn_output.weight | 0x7c78b400 | 0x6e0000 | +| 267 | blk.29.attn_q.weight | 0x7ce6b400 | 0x520000 | +| 268 | blk.29.attn_v.weight | 0x7d38b400 | 0x150000 | +| 269 | blk.29.ffn_down.weight | 0x7d4db400 | 0x1004000 | +| 270 | blk.29.ffn_gate.weight | 0x7e4df400 | 0xce4000 | +| 271 | blk.29.ffn_norm.weight | 0x7f1c3400 | 0x4000 | +| 272 | blk.29.ffn_up.weight | 0x7f1c7400 | 0x1004000 | +| 273 | blk.30.attn_k.weight | 0x801cb400 | 0x148000 | +| 274 | blk.30.attn_norm.weight | 0x80313400 | 0x4000 | +| 275 | blk.30.attn_output.weight | 0x80317400 | 0x6e0000 | +| 276 | blk.30.attn_q.weight | 0x809f7400 | 0x520000 | +| 277 | blk.30.attn_v.weight | 0x80f17400 | 0x188000 | +| 278 | blk.30.ffn_down.weight | 0x8109f400 | 0x1004000 | +| 279 | blk.30.ffn_gate.weight | 0x820a3400 | 0xce4000 | +| 280 | blk.30.ffn_norm.weight | 0x82d87400 | 0x4000 | +| 281 | blk.30.ffn_up.weight | 0x82d8b400 | 0xe74000 | +| 282 | blk.31.attn_k.weight | 0x83bff400 | 0x128000 | +| 283 | blk.31.attn_norm.weight | 0x83d27400 | 0x4000 | +| 284 | blk.31.attn_output.weight | 0x83d2b400 | 0x6e0000 | +| 285 | blk.31.attn_q.weight | 0x8440b400 | 0x520000 | +| 286 | blk.31.attn_v.weight | 0x8492b400 | 0x148000 | +| 287 | blk.31.ffn_down.weight | 0x84a73400 | 0x1004000 | +| 288 | blk.31.ffn_gate.weight | 0x85a77400 | 0xce4000 | +| 289 | blk.31.ffn_norm.weight | 0x8675b400 | 0x4000 | +| 290 | blk.31.ffn_up.weight | 0x8675f400 | 0x1004000 | +| 291 | blk.32.attn_k.weight | 0x87763400 | 0x128000 | +| 292 | blk.32.attn_norm.weight | 0x8788b400 | 0x4000 | +| 293 | blk.32.attn_output.weight | 0x8788f400 | 0x6e0000 | +| 294 | blk.32.attn_q.weight | 0x87f6f400 | 0x520000 | +| 295 | blk.32.attn_v.weight | 0x8848f400 | 0x150000 | +| 296 | blk.32.ffn_down.weight | 0x885df400 | 0x1004000 | +| 297 | blk.32.ffn_gate.weight | 0x895e3400 | 0xce4000 | +| 298 | blk.32.ffn_norm.weight | 0x8a2c7400 | 0x4000 | +| 299 | blk.32.ffn_up.weight | 0x8a2cb400 | 0xe74000 | +| 300 | blk.33.attn_k.weight | 0x8b13f400 | 0x128000 | +| 301 | blk.33.attn_norm.weight | 0x8b267400 | 0x4000 | +| 302 | blk.33.attn_output.weight | 0x8b26b400 | 0x6e0000 | +| 303 | blk.33.attn_q.weight | 0x8b94b400 | 0x520000 | +| 304 | blk.33.attn_v.weight | 0x8be6b400 | 0x150000 | +| 305 | blk.33.ffn_down.weight | 0x8bfbb400 | 0xe74000 | +| 306 | blk.33.ffn_gate.weight | 0x8ce2f400 | 0xce4000 | +| 307 | blk.33.ffn_norm.weight | 0x8db13400 | 0x4000 | +| 308 | blk.33.ffn_up.weight | 0x8db17400 | 0xe74000 | +| 309 | blk.34.attn_k.weight | 0x8e98b400 | 0x128000 | +| 310 | blk.34.attn_norm.weight | 0x8eab3400 | 0x4000 | +| 311 | blk.34.attn_output.weight | 0x8eab7400 | 0x6e0000 | +| 312 | blk.34.attn_q.weight | 0x8f197400 | 0x520000 | +| 313 | blk.34.attn_v.weight | 0x8f6b7400 | 0x150000 | +| 314 | blk.34.ffn_down.weight | 0x8f807400 | 0xe74000 | +| 315 | blk.34.ffn_gate.weight | 0x9067b400 | 0xce4000 | +| 316 | blk.34.ffn_norm.weight | 0x9135f400 | 0x4000 | +| 317 | blk.34.ffn_up.weight | 0x91363400 | 0xce4000 | +| 318 | blk.35.attn_k.weight | 0x92047400 | 0x128000 | +| 319 | blk.35.attn_norm.weight | 0x9216f400 | 0x4000 | +| 320 | blk.35.attn_output.weight | 0x92173400 | 0x6e0000 | +| 321 | blk.35.attn_q.weight | 0x92853400 | 0x520000 | +| 322 | blk.35.attn_v.weight | 0x92d73400 | 0x150000 | +| 323 | blk.35.ffn_down.weight | 0x92ec3400 | 0xe74000 | +| 324 | blk.35.ffn_gate.weight | 0x93d37400 | 0xce4000 | +| 325 | blk.35.ffn_norm.weight | 0x94a1b400 | 0x4000 | +| 326 | blk.35.ffn_up.weight | 0x94a1f400 | 0xe74000 | +| 327 | blk.36.attn_k.weight | 0x95893400 | 0x128000 | +| 328 | blk.36.attn_norm.weight | 0x959bb400 | 0x4000 | +| 329 | blk.36.attn_output.weight | 0x959bf400 | 0x6e0000 | +| 330 | blk.36.attn_q.weight | 0x9609f400 | 0x4a0000 | +| 331 | blk.36.attn_v.weight | 0x9653f400 | 0x150000 | +| 332 | blk.36.ffn_down.weight | 0x9668f400 | 0x1004000 | +| 333 | blk.36.ffn_gate.weight | 0x97693400 | 0xce4000 | +| 334 | blk.36.ffn_norm.weight | 0x98377400 | 0x4000 | +| 335 | blk.36.ffn_up.weight | 0x9837b400 | 0xe74000 | +| 336 | blk.37.attn_k.weight | 0x991ef400 | 0x128000 | +| 337 | blk.37.attn_norm.weight | 0x99317400 | 0x4000 | +| 338 | blk.37.attn_output.weight | 0x9931b400 | 0x6e0000 | +| 339 | blk.37.attn_q.weight | 0x999fb400 | 0x520000 | +| 340 | blk.37.attn_v.weight | 0x99f1b400 | 0x150000 | +| 341 | blk.37.ffn_down.weight | 0x9a06b400 | 0x1004000 | +| 342 | blk.37.ffn_gate.weight | 0x9b06f400 | 0xce4000 | +| 343 | blk.37.ffn_norm.weight | 0x9bd53400 | 0x4000 | +| 344 | blk.37.ffn_up.weight | 0x9bd57400 | 0xe74000 | +| 345 | blk.38.attn_k.weight | 0x9cbcb400 | 0x128000 | +| 346 | blk.38.attn_norm.weight | 0x9ccf3400 | 0x4000 | +| 347 | blk.38.attn_output.weight | 0x9ccf7400 | 0x6e0000 | +| 348 | blk.38.attn_q.weight | 0x9d3d7400 | 0x520000 | +| 349 | blk.38.attn_v.weight | 0x9d8f7400 | 0x150000 | +| 350 | blk.38.ffn_down.weight | 0x9da47400 | 0x1004000 | +| 351 | blk.38.ffn_gate.weight | 0x9ea4b400 | 0xce4000 | +| 352 | blk.38.ffn_norm.weight | 0x9f72f400 | 0x4000 | +| 353 | blk.38.ffn_up.weight | 0x9f733400 | 0xe74000 | +| 354 | blk.39.attn_k.weight | 0xa05a7400 | 0x128000 | +| 355 | blk.39.attn_norm.weight | 0xa06cf400 | 0x4000 | +| 356 | blk.39.attn_output.weight | 0xa06d3400 | 0x6e0000 | +| 357 | blk.39.attn_q.weight | 0xa0db3400 | 0x520000 | +| 358 | blk.39.attn_v.weight | 0xa12d3400 | 0x148000 | +| 359 | blk.39.ffn_down.weight | 0xa141b400 | 0xe74000 | +| 360 | blk.39.ffn_gate.weight | 0xa228f400 | 0xe74000 | +| 361 | blk.39.ffn_norm.weight | 0xa3103400 | 0x4000 | +| 362 | blk.39.ffn_up.weight | 0xa3107400 | 0xe74000 | + +### Base Tensor Group : ~822M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------|:---------------------------------|:------------------|:----------------------|:-----|--------:| +| 0 | output.weight | Output (W) | (~411M) 411041792 | 4096 x 100352 x 1 x 1 | Q3_K | 3.4375 | +| 1 | output_norm.weight | Output Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 2 | token_embd.weight | Token Embedding (W) | (~411M) 411041792 | 4096 x 100352 x 1 x 1 | Q2_K | 2.6250 | + +- Total elements in base: (~822M) 822087680 +- Percentage of total elements: 9.35% +- Bits per Weight (BPW) for base: 3.0314 bits + + +### Block 0 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 3 | blk.0.attn_k.weight | Block 0 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_M | 1.7500 | +| 4 | blk.0.attn_norm.weight | Block 0 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 5 | blk.0.attn_output.weight | Block 0 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q2_K | 2.6250 | +| 6 | blk.0.attn_q.weight | Block 0 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 7 | blk.0.attn_v.weight | Block 0 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ1_M | 1.7500 | +| 8 | blk.0.ffn_down.weight | Block 0 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ1_M | 1.7500 | +| 9 | blk.0.ffn_gate.weight | Block 0 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 10 | blk.0.ffn_norm.weight | Block 0 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 11 | blk.0.ffn_up.weight | Block 0 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XS | 2.3125 | + +- Total elements in blk.0: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.0: 1.9236 bits + + +### Block 1 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 12 | blk.1.attn_k.weight | Block 1 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 13 | blk.1.attn_norm.weight | Block 1 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 14 | blk.1.attn_output.weight | Block 1 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 15 | blk.1.attn_q.weight | Block 1 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 16 | blk.1.attn_v.weight | Block 1 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_XXS | 3.0625 | +| 17 | blk.1.ffn_down.weight | Block 1 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 18 | blk.1.ffn_gate.weight | Block 1 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 19 | blk.1.ffn_norm.weight | Block 1 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 20 | blk.1.ffn_up.weight | Block 1 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.1: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.1: 2.4900 bits + + +### Block 2 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 21 | blk.2.attn_k.weight | Block 2 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 22 | blk.2.attn_norm.weight | Block 2 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 23 | blk.2.attn_output.weight | Block 2 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 24 | blk.2.attn_q.weight | Block 2 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 25 | blk.2.attn_v.weight | Block 2 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_XXS | 3.0625 | +| 26 | blk.2.ffn_down.weight | Block 2 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 27 | blk.2.ffn_gate.weight | Block 2 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 28 | blk.2.ffn_norm.weight | Block 2 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 29 | blk.2.ffn_up.weight | Block 2 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.2: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.2: 2.5111 bits + + +### Block 3 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 30 | blk.3.attn_k.weight | Block 3 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XXS | 2.0625 | +| 31 | blk.3.attn_norm.weight | Block 3 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 32 | blk.3.attn_output.weight | Block 3 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 33 | blk.3.attn_q.weight | Block 3 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 34 | blk.3.attn_v.weight | Block 3 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_XXS | 3.0625 | +| 35 | blk.3.ffn_down.weight | Block 3 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_XXS | 2.0625 | +| 36 | blk.3.ffn_gate.weight | Block 3 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 37 | blk.3.ffn_norm.weight | Block 3 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 38 | blk.3.ffn_up.weight | Block 3 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XS | 2.3125 | + +- Total elements in blk.3: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.3: 2.2874 bits + + +### Block 4 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 39 | blk.4.attn_k.weight | Block 4 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 40 | blk.4.attn_norm.weight | Block 4 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 41 | blk.4.attn_output.weight | Block 4 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 42 | blk.4.attn_q.weight | Block 4 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 43 | blk.4.attn_v.weight | Block 4 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_XXS | 3.0625 | +| 44 | blk.4.ffn_down.weight | Block 4 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 45 | blk.4.ffn_gate.weight | Block 4 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 46 | blk.4.ffn_norm.weight | Block 4 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 47 | blk.4.ffn_up.weight | Block 4 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | + +- Total elements in blk.4: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.4: 2.3795 bits + + +### Block 5 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 48 | blk.5.attn_k.weight | Block 5 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 49 | blk.5.attn_norm.weight | Block 5 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 50 | blk.5.attn_output.weight | Block 5 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 51 | blk.5.attn_q.weight | Block 5 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 52 | blk.5.attn_v.weight | Block 5 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 53 | blk.5.ffn_down.weight | Block 5 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q2_K | 2.6250 | +| 54 | blk.5.ffn_gate.weight | Block 5 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 55 | blk.5.ffn_norm.weight | Block 5 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 56 | blk.5.ffn_up.weight | Block 5 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XS | 2.3125 | + +- Total elements in blk.5: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.5: 2.4525 bits + + +### Block 6 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 57 | blk.6.attn_k.weight | Block 6 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 58 | blk.6.attn_norm.weight | Block 6 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 59 | blk.6.attn_output.weight | Block 6 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 60 | blk.6.attn_q.weight | Block 6 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 61 | blk.6.attn_v.weight | Block 6 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 62 | blk.6.ffn_down.weight | Block 6 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 63 | blk.6.ffn_gate.weight | Block 6 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 64 | blk.6.ffn_norm.weight | Block 6 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 65 | blk.6.ffn_up.weight | Block 6 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XS | 2.3125 | + +- Total elements in blk.6: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.6: 2.4150 bits + + +### Block 7 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 66 | blk.7.attn_k.weight | Block 7 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_S | 2.5625 | +| 67 | blk.7.attn_norm.weight | Block 7 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 68 | blk.7.attn_output.weight | Block 7 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 69 | blk.7.attn_q.weight | Block 7 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 70 | blk.7.attn_v.weight | Block 7 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 71 | blk.7.ffn_down.weight | Block 7 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 72 | blk.7.ffn_gate.weight | Block 7 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 73 | blk.7.ffn_norm.weight | Block 7 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 74 | blk.7.ffn_up.weight | Block 7 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.7: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.7: 2.5071 bits + + +### Block 8 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 75 | blk.8.attn_k.weight | Block 8 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 76 | blk.8.attn_norm.weight | Block 8 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 77 | blk.8.attn_output.weight | Block 8 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 78 | blk.8.attn_q.weight | Block 8 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 79 | blk.8.attn_v.weight | Block 8 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 80 | blk.8.ffn_down.weight | Block 8 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 81 | blk.8.ffn_gate.weight | Block 8 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 82 | blk.8.ffn_norm.weight | Block 8 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 83 | blk.8.ffn_up.weight | Block 8 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.8: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.8: 2.5019 bits + + +### Block 9 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 84 | blk.9.attn_k.weight | Block 9 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 85 | blk.9.attn_norm.weight | Block 9 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 86 | blk.9.attn_output.weight | Block 9 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 87 | blk.9.attn_q.weight | Block 9 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 88 | blk.9.attn_v.weight | Block 9 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 89 | blk.9.ffn_down.weight | Block 9 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 90 | blk.9.ffn_gate.weight | Block 9 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 91 | blk.9.ffn_norm.weight | Block 9 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 92 | blk.9.ffn_up.weight | Block 9 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.9: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.9: 2.5019 bits + + +### Block 10 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 93 | blk.10.attn_k.weight | Block 10 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 94 | blk.10.attn_norm.weight | Block 10 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 95 | blk.10.attn_output.weight | Block 10 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 96 | blk.10.attn_q.weight | Block 10 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 97 | blk.10.attn_v.weight | Block 10 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 98 | blk.10.ffn_down.weight | Block 10 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 99 | blk.10.ffn_gate.weight | Block 10 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 100 | blk.10.ffn_norm.weight | Block 10 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 101 | blk.10.ffn_up.weight | Block 10 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.10: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.10: 2.5019 bits + + +### Block 11 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 102 | blk.11.attn_k.weight | Block 11 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 103 | blk.11.attn_norm.weight | Block 11 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 104 | blk.11.attn_output.weight | Block 11 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 105 | blk.11.attn_q.weight | Block 11 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 106 | blk.11.attn_v.weight | Block 11 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 107 | blk.11.ffn_down.weight | Block 11 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 108 | blk.11.ffn_gate.weight | Block 11 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ1_S | 1.5625 | +| 109 | blk.11.ffn_norm.weight | Block 11 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 110 | blk.11.ffn_up.weight | Block 11 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.11: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.11: 2.3703 bits + + +### Block 12 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 111 | blk.12.attn_k.weight | Block 12 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 112 | blk.12.attn_norm.weight | Block 12 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 113 | blk.12.attn_output.weight | Block 12 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 114 | blk.12.attn_q.weight | Block 12 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 115 | blk.12.attn_v.weight | Block 12 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 116 | blk.12.ffn_down.weight | Block 12 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 117 | blk.12.ffn_gate.weight | Block 12 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 118 | blk.12.ffn_norm.weight | Block 12 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 119 | blk.12.ffn_up.weight | Block 12 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.12: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.12: 2.5019 bits + + +### Block 13 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 120 | blk.13.attn_k.weight | Block 13 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 121 | blk.13.attn_norm.weight | Block 13 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 122 | blk.13.attn_output.weight | Block 13 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 123 | blk.13.attn_q.weight | Block 13 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 124 | blk.13.attn_v.weight | Block 13 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 125 | blk.13.ffn_down.weight | Block 13 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 126 | blk.13.ffn_gate.weight | Block 13 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 127 | blk.13.ffn_norm.weight | Block 13 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 128 | blk.13.ffn_up.weight | Block 13 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.13: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.13: 2.5019 bits + + +### Block 14 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 129 | blk.14.attn_k.weight | Block 14 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 130 | blk.14.attn_norm.weight | Block 14 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 131 | blk.14.attn_output.weight | Block 14 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 132 | blk.14.attn_q.weight | Block 14 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 133 | blk.14.attn_v.weight | Block 14 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 134 | blk.14.ffn_down.weight | Block 14 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 135 | blk.14.ffn_gate.weight | Block 14 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 136 | blk.14.ffn_norm.weight | Block 14 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 137 | blk.14.ffn_up.weight | Block 14 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.14: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.14: 2.5019 bits + + +### Block 15 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 138 | blk.15.attn_k.weight | Block 15 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_S | 2.5625 | +| 139 | blk.15.attn_norm.weight | Block 15 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 140 | blk.15.attn_output.weight | Block 15 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 141 | blk.15.attn_q.weight | Block 15 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 142 | blk.15.attn_v.weight | Block 15 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 143 | blk.15.ffn_down.weight | Block 15 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 144 | blk.15.ffn_gate.weight | Block 15 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 145 | blk.15.ffn_norm.weight | Block 15 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 146 | blk.15.ffn_up.weight | Block 15 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.15: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.15: 2.5071 bits + + +### Block 16 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 147 | blk.16.attn_k.weight | Block 16 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_S | 2.5625 | +| 148 | blk.16.attn_norm.weight | Block 16 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 149 | blk.16.attn_output.weight | Block 16 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_XXS | 3.0625 | +| 150 | blk.16.attn_q.weight | Block 16 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 151 | blk.16.attn_v.weight | Block 16 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 152 | blk.16.ffn_down.weight | Block 16 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 153 | blk.16.ffn_gate.weight | Block 16 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 154 | blk.16.ffn_norm.weight | Block 16 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 155 | blk.16.ffn_up.weight | Block 16 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.16: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.16: 2.4756 bits + + +### Block 17 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 156 | blk.17.attn_k.weight | Block 17 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 157 | blk.17.attn_norm.weight | Block 17 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 158 | blk.17.attn_output.weight | Block 17 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 159 | blk.17.attn_q.weight | Block 17 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 160 | blk.17.attn_v.weight | Block 17 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 161 | blk.17.ffn_down.weight | Block 17 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 162 | blk.17.ffn_gate.weight | Block 17 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 163 | blk.17.ffn_norm.weight | Block 17 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 164 | blk.17.ffn_up.weight | Block 17 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.17: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.17: 2.5019 bits + + +### Block 18 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 165 | blk.18.attn_k.weight | Block 18 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 166 | blk.18.attn_norm.weight | Block 18 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 167 | blk.18.attn_output.weight | Block 18 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 168 | blk.18.attn_q.weight | Block 18 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 169 | blk.18.attn_v.weight | Block 18 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 170 | blk.18.ffn_down.weight | Block 18 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 171 | blk.18.ffn_gate.weight | Block 18 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 172 | blk.18.ffn_norm.weight | Block 18 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 173 | blk.18.ffn_up.weight | Block 18 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.18: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.18: 2.5019 bits + + +### Block 19 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 174 | blk.19.attn_k.weight | Block 19 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 175 | blk.19.attn_norm.weight | Block 19 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 176 | blk.19.attn_output.weight | Block 19 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 177 | blk.19.attn_q.weight | Block 19 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 178 | blk.19.attn_v.weight | Block 19 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 179 | blk.19.ffn_down.weight | Block 19 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 180 | blk.19.ffn_gate.weight | Block 19 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 181 | blk.19.ffn_norm.weight | Block 19 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 182 | blk.19.ffn_up.weight | Block 19 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.19: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.19: 2.5019 bits + + +### Block 20 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 183 | blk.20.attn_k.weight | Block 20 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_S | 2.5625 | +| 184 | blk.20.attn_norm.weight | Block 20 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 185 | blk.20.attn_output.weight | Block 20 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 186 | blk.20.attn_q.weight | Block 20 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 187 | blk.20.attn_v.weight | Block 20 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 188 | blk.20.ffn_down.weight | Block 20 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 189 | blk.20.ffn_gate.weight | Block 20 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 190 | blk.20.ffn_norm.weight | Block 20 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 191 | blk.20.ffn_up.weight | Block 20 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.20: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.20: 2.5071 bits + + +### Block 21 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 192 | blk.21.attn_k.weight | Block 21 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 193 | blk.21.attn_norm.weight | Block 21 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 194 | blk.21.attn_output.weight | Block 21 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 195 | blk.21.attn_q.weight | Block 21 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 196 | blk.21.attn_v.weight | Block 21 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 197 | blk.21.ffn_down.weight | Block 21 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 198 | blk.21.ffn_gate.weight | Block 21 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 199 | blk.21.ffn_norm.weight | Block 21 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 200 | blk.21.ffn_up.weight | Block 21 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.21: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.21: 2.5019 bits + + +### Block 22 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 201 | blk.22.attn_k.weight | Block 22 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 202 | blk.22.attn_norm.weight | Block 22 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 203 | blk.22.attn_output.weight | Block 22 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 204 | blk.22.attn_q.weight | Block 22 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 205 | blk.22.attn_v.weight | Block 22 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 206 | blk.22.ffn_down.weight | Block 22 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 207 | blk.22.ffn_gate.weight | Block 22 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 208 | blk.22.ffn_norm.weight | Block 22 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 209 | blk.22.ffn_up.weight | Block 22 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.22: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.22: 2.5019 bits + + +### Block 23 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 210 | blk.23.attn_k.weight | Block 23 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 211 | blk.23.attn_norm.weight | Block 23 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 212 | blk.23.attn_output.weight | Block 23 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 213 | blk.23.attn_q.weight | Block 23 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 214 | blk.23.attn_v.weight | Block 23 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 215 | blk.23.ffn_down.weight | Block 23 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 216 | blk.23.ffn_gate.weight | Block 23 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 217 | blk.23.ffn_norm.weight | Block 23 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 218 | blk.23.ffn_up.weight | Block 23 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XS | 2.3125 | + +- Total elements in blk.23: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.23: 2.4361 bits + + +### Block 24 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 219 | blk.24.attn_k.weight | Block 24 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 220 | blk.24.attn_norm.weight | Block 24 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 221 | blk.24.attn_output.weight | Block 24 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 222 | blk.24.attn_q.weight | Block 24 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 223 | blk.24.attn_v.weight | Block 24 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 224 | blk.24.ffn_down.weight | Block 24 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 225 | blk.24.ffn_gate.weight | Block 24 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 226 | blk.24.ffn_norm.weight | Block 24 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 227 | blk.24.ffn_up.weight | Block 24 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XS | 2.3125 | + +- Total elements in blk.24: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.24: 2.4150 bits + + +### Block 25 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 228 | blk.25.attn_k.weight | Block 25 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 229 | blk.25.attn_norm.weight | Block 25 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 230 | blk.25.attn_output.weight | Block 25 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 231 | blk.25.attn_q.weight | Block 25 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 232 | blk.25.attn_v.weight | Block 25 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 233 | blk.25.ffn_down.weight | Block 25 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 234 | blk.25.ffn_gate.weight | Block 25 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 235 | blk.25.ffn_norm.weight | Block 25 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 236 | blk.25.ffn_up.weight | Block 25 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.25: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.25: 2.5019 bits + + +### Block 26 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 237 | blk.26.attn_k.weight | Block 26 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 238 | blk.26.attn_norm.weight | Block 26 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 239 | blk.26.attn_output.weight | Block 26 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 240 | blk.26.attn_q.weight | Block 26 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 241 | blk.26.attn_v.weight | Block 26 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 242 | blk.26.ffn_down.weight | Block 26 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 243 | blk.26.ffn_gate.weight | Block 26 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 244 | blk.26.ffn_norm.weight | Block 26 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 245 | blk.26.ffn_up.weight | Block 26 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.26: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.26: 2.5019 bits + + +### Block 27 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 246 | blk.27.attn_k.weight | Block 27 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_S | 2.5625 | +| 247 | blk.27.attn_norm.weight | Block 27 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 248 | blk.27.attn_output.weight | Block 27 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 249 | blk.27.attn_q.weight | Block 27 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 250 | blk.27.attn_v.weight | Block 27 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 251 | blk.27.ffn_down.weight | Block 27 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 252 | blk.27.ffn_gate.weight | Block 27 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 253 | blk.27.ffn_norm.weight | Block 27 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 254 | blk.27.ffn_up.weight | Block 27 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XS | 2.3125 | + +- Total elements in blk.27: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.27: 2.4413 bits + + +### Block 28 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 255 | blk.28.attn_k.weight | Block 28 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_S | 2.5625 | +| 256 | blk.28.attn_norm.weight | Block 28 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 257 | blk.28.attn_output.weight | Block 28 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 258 | blk.28.attn_q.weight | Block 28 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 259 | blk.28.attn_v.weight | Block 28 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 260 | blk.28.ffn_down.weight | Block 28 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 261 | blk.28.ffn_gate.weight | Block 28 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 262 | blk.28.ffn_norm.weight | Block 28 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 263 | blk.28.ffn_up.weight | Block 28 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.28: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.28: 2.5071 bits + + +### Block 29 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 264 | blk.29.attn_k.weight | Block 29 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 265 | blk.29.attn_norm.weight | Block 29 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 266 | blk.29.attn_output.weight | Block 29 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 267 | blk.29.attn_q.weight | Block 29 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 268 | blk.29.attn_v.weight | Block 29 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 269 | blk.29.ffn_down.weight | Block 29 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 270 | blk.29.ffn_gate.weight | Block 29 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 271 | blk.29.ffn_norm.weight | Block 29 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 272 | blk.29.ffn_up.weight | Block 29 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.29: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.29: 2.5019 bits + + +### Block 30 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 273 | blk.30.attn_k.weight | Block 30 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_S | 2.5625 | +| 274 | blk.30.attn_norm.weight | Block 30 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 275 | blk.30.attn_output.weight | Block 30 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 276 | blk.30.attn_q.weight | Block 30 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 277 | blk.30.attn_v.weight | Block 30 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_XXS | 3.0625 | +| 278 | blk.30.ffn_down.weight | Block 30 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 279 | blk.30.ffn_gate.weight | Block 30 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 280 | blk.30.ffn_norm.weight | Block 30 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 281 | blk.30.ffn_up.weight | Block 30 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XS | 2.3125 | + +- Total elements in blk.30: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.30: 2.4506 bits + + +### Block 31 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 282 | blk.31.attn_k.weight | Block 31 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 283 | blk.31.attn_norm.weight | Block 31 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 284 | blk.31.attn_output.weight | Block 31 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 285 | blk.31.attn_q.weight | Block 31 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 286 | blk.31.attn_v.weight | Block 31 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_S | 2.5625 | +| 287 | blk.31.ffn_down.weight | Block 31 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 288 | blk.31.ffn_gate.weight | Block 31 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 289 | blk.31.ffn_norm.weight | Block 31 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 290 | blk.31.ffn_up.weight | Block 31 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_S | 2.5625 | + +- Total elements in blk.31: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.31: 2.5006 bits + + +### Block 32 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 291 | blk.32.attn_k.weight | Block 32 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 292 | blk.32.attn_norm.weight | Block 32 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 293 | blk.32.attn_output.weight | Block 32 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 294 | blk.32.attn_q.weight | Block 32 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 295 | blk.32.attn_v.weight | Block 32 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 296 | blk.32.ffn_down.weight | Block 32 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 297 | blk.32.ffn_gate.weight | Block 32 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 298 | blk.32.ffn_norm.weight | Block 32 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 299 | blk.32.ffn_up.weight | Block 32 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XS | 2.3125 | + +- Total elements in blk.32: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.32: 2.4361 bits + + +### Block 33 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 300 | blk.33.attn_k.weight | Block 33 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 301 | blk.33.attn_norm.weight | Block 33 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 302 | blk.33.attn_output.weight | Block 33 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 303 | blk.33.attn_q.weight | Block 33 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 304 | blk.33.attn_v.weight | Block 33 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 305 | blk.33.ffn_down.weight | Block 33 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 306 | blk.33.ffn_gate.weight | Block 33 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 307 | blk.33.ffn_norm.weight | Block 33 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 308 | blk.33.ffn_up.weight | Block 33 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XS | 2.3125 | + +- Total elements in blk.33: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.33: 2.3703 bits + + +### Block 34 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 309 | blk.34.attn_k.weight | Block 34 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 310 | blk.34.attn_norm.weight | Block 34 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 311 | blk.34.attn_output.weight | Block 34 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 312 | blk.34.attn_q.weight | Block 34 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 313 | blk.34.attn_v.weight | Block 34 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 314 | blk.34.ffn_down.weight | Block 34 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 315 | blk.34.ffn_gate.weight | Block 34 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 316 | blk.34.ffn_norm.weight | Block 34 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 317 | blk.34.ffn_up.weight | Block 34 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | + +- Total elements in blk.34: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.34: 2.3045 bits + + +### Block 35 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 318 | blk.35.attn_k.weight | Block 35 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 319 | blk.35.attn_norm.weight | Block 35 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 320 | blk.35.attn_output.weight | Block 35 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 321 | blk.35.attn_q.weight | Block 35 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 322 | blk.35.attn_v.weight | Block 35 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 323 | blk.35.ffn_down.weight | Block 35 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 324 | blk.35.ffn_gate.weight | Block 35 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 325 | blk.35.ffn_norm.weight | Block 35 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 326 | blk.35.ffn_up.weight | Block 35 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XS | 2.3125 | + +- Total elements in blk.35: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.35: 2.3703 bits + + +### Block 36 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 327 | blk.36.attn_k.weight | Block 36 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 328 | blk.36.attn_norm.weight | Block 36 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 329 | blk.36.attn_output.weight | Block 36 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 330 | blk.36.attn_q.weight | Block 36 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 331 | blk.36.attn_v.weight | Block 36 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 332 | blk.36.ffn_down.weight | Block 36 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 333 | blk.36.ffn_gate.weight | Block 36 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 334 | blk.36.ffn_norm.weight | Block 36 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 335 | blk.36.ffn_up.weight | Block 36 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XS | 2.3125 | + +- Total elements in blk.36: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.36: 2.4150 bits + + +### Block 37 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 336 | blk.37.attn_k.weight | Block 37 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 337 | blk.37.attn_norm.weight | Block 37 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 338 | blk.37.attn_output.weight | Block 37 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 339 | blk.37.attn_q.weight | Block 37 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 340 | blk.37.attn_v.weight | Block 37 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 341 | blk.37.ffn_down.weight | Block 37 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 342 | blk.37.ffn_gate.weight | Block 37 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 343 | blk.37.ffn_norm.weight | Block 37 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 344 | blk.37.ffn_up.weight | Block 37 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XS | 2.3125 | + +- Total elements in blk.37: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.37: 2.4361 bits + + +### Block 38 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 345 | blk.38.attn_k.weight | Block 38 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 346 | blk.38.attn_norm.weight | Block 38 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 347 | blk.38.attn_output.weight | Block 38 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 348 | blk.38.attn_q.weight | Block 38 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 349 | blk.38.attn_v.weight | Block 38 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q2_K | 2.6250 | +| 350 | blk.38.ffn_down.weight | Block 38 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 351 | blk.38.ffn_gate.weight | Block 38 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 352 | blk.38.ffn_norm.weight | Block 38 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 353 | blk.38.ffn_up.weight | Block 38 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XS | 2.3125 | + +- Total elements in blk.38: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.38: 2.4361 bits + + +### Block 39 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 354 | blk.39.attn_k.weight | Block 39 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 355 | blk.39.attn_norm.weight | Block 39 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 356 | blk.39.attn_output.weight | Block 39 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 357 | blk.39.attn_q.weight | Block 39 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_S | 2.5625 | +| 358 | blk.39.attn_v.weight | Block 39 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_S | 2.5625 | +| 359 | blk.39.ffn_down.weight | Block 39 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 360 | blk.39.ffn_gate.weight | Block 39 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XS | 2.3125 | +| 361 | blk.39.ffn_norm.weight | Block 39 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 362 | blk.39.ffn_up.weight | Block 39 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XS | 2.3125 | + +- Total elements in blk.39: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.39: 2.4348 bits + + +Total BPW for granite-4.1-8b-Q2_K.gguf: 2.5000 bits diff --git a/scores/granite-4.1-8b-Q3_K.md b/scores/granite-4.1-8b-Q3_K.md new file mode 100644 index 0000000..7b6296a --- /dev/null +++ b/scores/granite-4.1-8b-Q3_K.md @@ -0,0 +1,1247 @@ +# granite-4.1-8b-WIP/granite-4.1-8b-Q3_K.gguf - GGUF Internal File Dump + +- Endian: LITTLE endian + +## Key Value Metadata Store + +There are 42 key-value pairs in this file + +| POS | TYPE | Count | Key | Value | +|----:|:---------|-------:|:-----------------------------------------|:--------------------------------------------------------------------| +| 1 | UINT32 | 1 | GGUF.version | 3 | +| 2 | UINT64 | 1 | GGUF.tensor_count | 363 | +| 3 | UINT64 | 1 | GGUF.kv_count | 39 | +| 4 | STRING | 1 | general.architecture | `granite` | +| 5 | STRING | 1 | general.type | `model` | +| 6 | STRING | 1 | general.name | `Granite 4.1 8b` | +| 7 | STRING | 1 | general.basename | `granite-4.1` | +| 8 | STRING | 1 | general.size_label | `8B` | +| 9 | STRING | 1 | general.license | `apache-2.0` | +| 10 | [STRING] | 2 | general.tags | [ `language`, `granite-4.1` ] | +| 11 | UINT32 | 1 | granite.block_count | 40 | +| 12 | UINT32 | 1 | granite.context_length | 131072 | +| 13 | UINT32 | 1 | granite.embedding_length | 4096 | +| 14 | UINT32 | 1 | granite.feed_forward_length | 12800 | +| 15 | UINT32 | 1 | granite.attention.head_count | 32 | +| 16 | UINT32 | 1 | granite.attention.head_count_kv | 8 | +| 17 | FLOAT32 | 1 | granite.rope.freq_base | 10000000.0 | +| 18 | FLOAT32 | 1 | granite.attention.layer_norm_rms_epsilon | 1e-05 | +| 19 | UINT32 | 1 | granite.vocab_size | 100352 | +| 20 | UINT32 | 1 | granite.rope.dimension_count | 128 | +| 21 | FLOAT32 | 1 | granite.attention.scale | 0.0078125 | +| 22 | FLOAT32 | 1 | granite.embedding_scale | 12.0 | +| 23 | FLOAT32 | 1 | granite.residual_scale | 0.22 | +| 24 | FLOAT32 | 1 | granite.logit_scale | 16.0 | +| 25 | STRING | 1 | tokenizer.ggml.model | `gpt2` | +| 26 | STRING | 1 | tokenizer.ggml.pre | `granite-docling` | +| 27 | [STRING] | 100352 | tokenizer.ggml.tokens | [ `!`, `"`, `#`, `$`, `%`, ... ] | +| 28 | [INT32] | 100352 | tokenizer.ggml.token_type | [ 1, 1, 1, 1, 1, 1, 1, ... ] | +| 29 | [STRING] | 100000 | tokenizer.ggml.merges | [ `Ġ Ġ`, `ĠĠ ĠĠ`, `i n`, `Ġ t`, `ĠĠĠĠ ĠĠĠĠ`, ... ] | +| 30 | UINT32 | 1 | tokenizer.ggml.bos_token_id | 100257 | +| 31 | UINT32 | 1 | tokenizer.ggml.eos_token_id | 100257 | +| 32 | UINT32 | 1 | tokenizer.ggml.unknown_token_id | 100269 | +| 33 | UINT32 | 1 | tokenizer.ggml.padding_token_id | 100256 | +| 34 | BOOL | 1 | tokenizer.ggml.add_bos_token | False | +| 35 | STRING | 1 | tokenizer.chat_template | `{%- set tools_system_message_p`...`end_of_role|>' }}{%- endif %}` | +| 36 | BOOL | 1 | tokenizer.ggml.add_space_prefix | False | +| 37 | UINT32 | 1 | general.quantization_version | 2 | +| 38 | UINT32 | 1 | general.file_type | 30 | +| 39 | STRING | 1 | quantize.imatrix.file | `imatrix/imatrix-granite-4.1-8b-medium.gguf` | +| 40 | STRING | 1 | quantize.imatrix.dataset | `../../datasets/imatrix/combined_all_medium.txt` | +| 41 | UINT32 | 1 | quantize.imatrix.entries_count | 280 | +| 42 | UINT32 | 1 | quantize.imatrix.chunks_count | 3601 | + +## Tensors Overview ~9B Elements + +Total number of elements in all tensors: 8791592960 Elements + +- [granite-4.1-8b-WIP/granite-4.1-8b-Q3\_K.gguf - GGUF Internal File Dump](#granite-41-8b-wipgranite-41-8b-q3_kgguf---gguf-internal-file-dump) + - [Key Value Metadata Store](#key-value-metadata-store) + - [Tensors Overview ~9B Elements](#tensors-overview-9b-elements) + - [Tensor Data Offset](#tensor-data-offset) + - [Base Tensor Group : ~822M Elements](#base-tensor-group--822m-elements) + - [Block 0 Tensor Group : ~199M Elements](#block-0-tensor-group--199m-elements) + - [Block 1 Tensor Group : ~199M Elements](#block-1-tensor-group--199m-elements) + - [Block 2 Tensor Group : ~199M Elements](#block-2-tensor-group--199m-elements) + - [Block 3 Tensor Group : ~199M Elements](#block-3-tensor-group--199m-elements) + - [Block 4 Tensor Group : ~199M Elements](#block-4-tensor-group--199m-elements) + - [Block 5 Tensor Group : ~199M Elements](#block-5-tensor-group--199m-elements) + - [Block 6 Tensor Group : ~199M Elements](#block-6-tensor-group--199m-elements) + - [Block 7 Tensor Group : ~199M Elements](#block-7-tensor-group--199m-elements) + - [Block 8 Tensor Group : ~199M Elements](#block-8-tensor-group--199m-elements) + - [Block 9 Tensor Group : ~199M Elements](#block-9-tensor-group--199m-elements) + - [Block 10 Tensor Group : ~199M Elements](#block-10-tensor-group--199m-elements) + - [Block 11 Tensor Group : ~199M Elements](#block-11-tensor-group--199m-elements) + - [Block 12 Tensor Group : ~199M Elements](#block-12-tensor-group--199m-elements) + - [Block 13 Tensor Group : ~199M Elements](#block-13-tensor-group--199m-elements) + - [Block 14 Tensor Group : ~199M Elements](#block-14-tensor-group--199m-elements) + - [Block 15 Tensor Group : ~199M Elements](#block-15-tensor-group--199m-elements) + - [Block 16 Tensor Group : ~199M Elements](#block-16-tensor-group--199m-elements) + - [Block 17 Tensor Group : ~199M Elements](#block-17-tensor-group--199m-elements) + - [Block 18 Tensor Group : ~199M Elements](#block-18-tensor-group--199m-elements) + - [Block 19 Tensor Group : ~199M Elements](#block-19-tensor-group--199m-elements) + - [Block 20 Tensor Group : ~199M Elements](#block-20-tensor-group--199m-elements) + - [Block 21 Tensor Group : ~199M Elements](#block-21-tensor-group--199m-elements) + - [Block 22 Tensor Group : ~199M Elements](#block-22-tensor-group--199m-elements) + - [Block 23 Tensor Group : ~199M Elements](#block-23-tensor-group--199m-elements) + - [Block 24 Tensor Group : ~199M Elements](#block-24-tensor-group--199m-elements) + - [Block 25 Tensor Group : ~199M Elements](#block-25-tensor-group--199m-elements) + - [Block 26 Tensor Group : ~199M Elements](#block-26-tensor-group--199m-elements) + - [Block 27 Tensor Group : ~199M Elements](#block-27-tensor-group--199m-elements) + - [Block 28 Tensor Group : ~199M Elements](#block-28-tensor-group--199m-elements) + - [Block 29 Tensor Group : ~199M Elements](#block-29-tensor-group--199m-elements) + - [Block 30 Tensor Group : ~199M Elements](#block-30-tensor-group--199m-elements) + - [Block 31 Tensor Group : ~199M Elements](#block-31-tensor-group--199m-elements) + - [Block 32 Tensor Group : ~199M Elements](#block-32-tensor-group--199m-elements) + - [Block 33 Tensor Group : ~199M Elements](#block-33-tensor-group--199m-elements) + - [Block 34 Tensor Group : ~199M Elements](#block-34-tensor-group--199m-elements) + - [Block 35 Tensor Group : ~199M Elements](#block-35-tensor-group--199m-elements) + - [Block 36 Tensor Group : ~199M Elements](#block-36-tensor-group--199m-elements) + - [Block 37 Tensor Group : ~199M Elements](#block-37-tensor-group--199m-elements) + - [Block 38 Tensor Group : ~199M Elements](#block-38-tensor-group--199m-elements) + - [Block 39 Tensor Group : ~199M Elements](#block-39-tensor-group--199m-elements) + +### Tensor Data Offset + +This table contains the offset and data segment relative to start of file + +| T_ID | Tensor Layer Name | Data Offset (B) | Data Size (B) | +|-----:|:--------------------------|-----------------:|-----------------:| +| 0 | output.weight | 0x367400 | 0xdc80000 | +| 1 | output_norm.weight | 0xdfe7400 | 0x4000 | +| 2 | token_embd.weight | 0xdfeb400 | 0xa870000 | +| 3 | blk.0.attn_k.weight | 0x1885b400 | 0x128000 | +| 4 | blk.0.attn_norm.weight | 0x18983400 | 0x4000 | +| 5 | blk.0.attn_output.weight | 0x18987400 | 0x620000 | +| 6 | blk.0.attn_q.weight | 0x18fa7400 | 0x4a0000 | +| 7 | blk.0.attn_v.weight | 0x19447400 | 0x188000 | +| 8 | blk.0.ffn_down.weight | 0x195cf400 | 0x1068000 | +| 9 | blk.0.ffn_gate.weight | 0x1a637400 | 0xce4000 | +| 10 | blk.0.ffn_norm.weight | 0x1b31b400 | 0x4000 | +| 11 | blk.0.ffn_up.weight | 0x1b31f400 | 0x1324000 | +| 12 | blk.1.attn_k.weight | 0x1c643400 | 0x188000 | +| 13 | blk.1.attn_norm.weight | 0x1c7cb400 | 0x4000 | +| 14 | blk.1.attn_output.weight | 0x1c7cf400 | 0x880000 | +| 15 | blk.1.attn_q.weight | 0x1d04f400 | 0x6e0000 | +| 16 | blk.1.attn_v.weight | 0x1d72f400 | 0x240000 | +| 17 | blk.1.ffn_down.weight | 0x1d96f400 | 0x157c000 | +| 18 | blk.1.ffn_gate.weight | 0x1eeeb400 | 0x1324000 | +| 19 | blk.1.ffn_norm.weight | 0x2020f400 | 0x4000 | +| 20 | blk.1.ffn_up.weight | 0x20213400 | 0x1a90000 | +| 21 | blk.2.attn_k.weight | 0x21ca3400 | 0x1b8000 | +| 22 | blk.2.attn_norm.weight | 0x21e5b400 | 0x4000 | +| 23 | blk.2.attn_output.weight | 0x21e5f400 | 0x880000 | +| 24 | blk.2.attn_q.weight | 0x226df400 | 0x6e0000 | +| 25 | blk.2.attn_v.weight | 0x22dbf400 | 0x220000 | +| 26 | blk.2.ffn_down.weight | 0x22fdf400 | 0x157c000 | +| 27 | blk.2.ffn_gate.weight | 0x2455b400 | 0x1324000 | +| 28 | blk.2.ffn_norm.weight | 0x2587f400 | 0x4000 | +| 29 | blk.2.ffn_up.weight | 0x25883400 | 0x1a90000 | +| 30 | blk.3.attn_k.weight | 0x27313400 | 0x188000 | +| 31 | blk.3.attn_norm.weight | 0x2749b400 | 0x4000 | +| 32 | blk.3.attn_output.weight | 0x2749f400 | 0x880000 | +| 33 | blk.3.attn_q.weight | 0x27d1f400 | 0x6e0000 | +| 34 | blk.3.attn_v.weight | 0x283ff400 | 0x240000 | +| 35 | blk.3.ffn_down.weight | 0x2863f400 | 0x1324000 | +| 36 | blk.3.ffn_gate.weight | 0x29963400 | 0x157c000 | +| 37 | blk.3.ffn_norm.weight | 0x2aedf400 | 0x4000 | +| 38 | blk.3.ffn_up.weight | 0x2aee3400 | 0x157c000 | +| 39 | blk.4.attn_k.weight | 0x2c45f400 | 0x1b8000 | +| 40 | blk.4.attn_norm.weight | 0x2c617400 | 0x4000 | +| 41 | blk.4.attn_output.weight | 0x2c61b400 | 0x880000 | +| 42 | blk.4.attn_q.weight | 0x2ce9b400 | 0x6e0000 | +| 43 | blk.4.attn_v.weight | 0x2d57b400 | 0x240000 | +| 44 | blk.4.ffn_down.weight | 0x2d7bb400 | 0x157c000 | +| 45 | blk.4.ffn_gate.weight | 0x2ed37400 | 0x157c000 | +| 46 | blk.4.ffn_norm.weight | 0x302b3400 | 0x4000 | +| 47 | blk.4.ffn_up.weight | 0x302b7400 | 0x157c000 | +| 48 | blk.5.attn_k.weight | 0x31833400 | 0x1b8000 | +| 49 | blk.5.attn_norm.weight | 0x319eb400 | 0x4000 | +| 50 | blk.5.attn_output.weight | 0x319ef400 | 0x880000 | +| 51 | blk.5.attn_q.weight | 0x3226f400 | 0x6e0000 | +| 52 | blk.5.attn_v.weight | 0x3294f400 | 0x240000 | +| 53 | blk.5.ffn_down.weight | 0x32b8f400 | 0x1324000 | +| 54 | blk.5.ffn_gate.weight | 0x33eb3400 | 0x157c000 | +| 55 | blk.5.ffn_norm.weight | 0x3542f400 | 0x4000 | +| 56 | blk.5.ffn_up.weight | 0x35433400 | 0x157c000 | +| 57 | blk.6.attn_k.weight | 0x369af400 | 0x1b8000 | +| 58 | blk.6.attn_norm.weight | 0x36b67400 | 0x4000 | +| 59 | blk.6.attn_output.weight | 0x36b6b400 | 0x880000 | +| 60 | blk.6.attn_q.weight | 0x373eb400 | 0x6e0000 | +| 61 | blk.6.attn_v.weight | 0x37acb400 | 0x240000 | +| 62 | blk.6.ffn_down.weight | 0x37d0b400 | 0x157c000 | +| 63 | blk.6.ffn_gate.weight | 0x39287400 | 0x1324000 | +| 64 | blk.6.ffn_norm.weight | 0x3a5ab400 | 0x4000 | +| 65 | blk.6.ffn_up.weight | 0x3a5af400 | 0x157c000 | +| 66 | blk.7.attn_k.weight | 0x3bb2b400 | 0x1b8000 | +| 67 | blk.7.attn_norm.weight | 0x3bce3400 | 0x4000 | +| 68 | blk.7.attn_output.weight | 0x3bce7400 | 0x880000 | +| 69 | blk.7.attn_q.weight | 0x3c567400 | 0x6e0000 | +| 70 | blk.7.attn_v.weight | 0x3cc47400 | 0x240000 | +| 71 | blk.7.ffn_down.weight | 0x3ce87400 | 0x157c000 | +| 72 | blk.7.ffn_gate.weight | 0x3e403400 | 0x1324000 | +| 73 | blk.7.ffn_norm.weight | 0x3f727400 | 0x4000 | +| 74 | blk.7.ffn_up.weight | 0x3f72b400 | 0x157c000 | +| 75 | blk.8.attn_k.weight | 0x40ca7400 | 0x1b8000 | +| 76 | blk.8.attn_norm.weight | 0x40e5f400 | 0x4000 | +| 77 | blk.8.attn_output.weight | 0x40e63400 | 0x880000 | +| 78 | blk.8.attn_q.weight | 0x416e3400 | 0x6e0000 | +| 79 | blk.8.attn_v.weight | 0x41dc3400 | 0x240000 | +| 80 | blk.8.ffn_down.weight | 0x42003400 | 0x157c000 | +| 81 | blk.8.ffn_gate.weight | 0x4357f400 | 0x1324000 | +| 82 | blk.8.ffn_norm.weight | 0x448a3400 | 0x4000 | +| 83 | blk.8.ffn_up.weight | 0x448a7400 | 0x157c000 | +| 84 | blk.9.attn_k.weight | 0x45e23400 | 0x1b8000 | +| 85 | blk.9.attn_norm.weight | 0x45fdb400 | 0x4000 | +| 86 | blk.9.attn_output.weight | 0x45fdf400 | 0x880000 | +| 87 | blk.9.attn_q.weight | 0x4685f400 | 0x6e0000 | +| 88 | blk.9.attn_v.weight | 0x46f3f400 | 0x240000 | +| 89 | blk.9.ffn_down.weight | 0x4717f400 | 0x157c000 | +| 90 | blk.9.ffn_gate.weight | 0x486fb400 | 0x1324000 | +| 91 | blk.9.ffn_norm.weight | 0x49a1f400 | 0x4000 | +| 92 | blk.9.ffn_up.weight | 0x49a23400 | 0x157c000 | +| 93 | blk.10.attn_k.weight | 0x4af9f400 | 0x1b8000 | +| 94 | blk.10.attn_norm.weight | 0x4b157400 | 0x4000 | +| 95 | blk.10.attn_output.weight | 0x4b15b400 | 0x880000 | +| 96 | blk.10.attn_q.weight | 0x4b9db400 | 0x6e0000 | +| 97 | blk.10.attn_v.weight | 0x4c0bb400 | 0x240000 | +| 98 | blk.10.ffn_down.weight | 0x4c2fb400 | 0x157c000 | +| 99 | blk.10.ffn_gate.weight | 0x4d877400 | 0x1324000 | +| 100 | blk.10.ffn_norm.weight | 0x4eb9b400 | 0x4000 | +| 101 | blk.10.ffn_up.weight | 0x4eb9f400 | 0x157c000 | +| 102 | blk.11.attn_k.weight | 0x5011b400 | 0x1b8000 | +| 103 | blk.11.attn_norm.weight | 0x502d3400 | 0x4000 | +| 104 | blk.11.attn_output.weight | 0x502d7400 | 0x880000 | +| 105 | blk.11.attn_q.weight | 0x50b57400 | 0x6e0000 | +| 106 | blk.11.attn_v.weight | 0x51237400 | 0x220000 | +| 107 | blk.11.ffn_down.weight | 0x51457400 | 0x157c000 | +| 108 | blk.11.ffn_gate.weight | 0x529d3400 | 0x1324000 | +| 109 | blk.11.ffn_norm.weight | 0x53cf7400 | 0x4000 | +| 110 | blk.11.ffn_up.weight | 0x53cfb400 | 0x157c000 | +| 111 | blk.12.attn_k.weight | 0x55277400 | 0x1b8000 | +| 112 | blk.12.attn_norm.weight | 0x5542f400 | 0x4000 | +| 113 | blk.12.attn_output.weight | 0x55433400 | 0x880000 | +| 114 | blk.12.attn_q.weight | 0x55cb3400 | 0x6e0000 | +| 115 | blk.12.attn_v.weight | 0x56393400 | 0x240000 | +| 116 | blk.12.ffn_down.weight | 0x565d3400 | 0x157c000 | +| 117 | blk.12.ffn_gate.weight | 0x57b4f400 | 0x1324000 | +| 118 | blk.12.ffn_norm.weight | 0x58e73400 | 0x4000 | +| 119 | blk.12.ffn_up.weight | 0x58e77400 | 0x157c000 | +| 120 | blk.13.attn_k.weight | 0x5a3f3400 | 0x1b8000 | +| 121 | blk.13.attn_norm.weight | 0x5a5ab400 | 0x4000 | +| 122 | blk.13.attn_output.weight | 0x5a5af400 | 0x880000 | +| 123 | blk.13.attn_q.weight | 0x5ae2f400 | 0x6e0000 | +| 124 | blk.13.attn_v.weight | 0x5b50f400 | 0x240000 | +| 125 | blk.13.ffn_down.weight | 0x5b74f400 | 0x157c000 | +| 126 | blk.13.ffn_gate.weight | 0x5cccb400 | 0x1324000 | +| 127 | blk.13.ffn_norm.weight | 0x5dfef400 | 0x4000 | +| 128 | blk.13.ffn_up.weight | 0x5dff3400 | 0x157c000 | +| 129 | blk.14.attn_k.weight | 0x5f56f400 | 0x1b8000 | +| 130 | blk.14.attn_norm.weight | 0x5f727400 | 0x4000 | +| 131 | blk.14.attn_output.weight | 0x5f72b400 | 0x880000 | +| 132 | blk.14.attn_q.weight | 0x5ffab400 | 0x6e0000 | +| 133 | blk.14.attn_v.weight | 0x6068b400 | 0x240000 | +| 134 | blk.14.ffn_down.weight | 0x608cb400 | 0x157c000 | +| 135 | blk.14.ffn_gate.weight | 0x61e47400 | 0x1324000 | +| 136 | blk.14.ffn_norm.weight | 0x6316b400 | 0x4000 | +| 137 | blk.14.ffn_up.weight | 0x6316f400 | 0x157c000 | +| 138 | blk.15.attn_k.weight | 0x646eb400 | 0x1b8000 | +| 139 | blk.15.attn_norm.weight | 0x648a3400 | 0x4000 | +| 140 | blk.15.attn_output.weight | 0x648a7400 | 0x880000 | +| 141 | blk.15.attn_q.weight | 0x65127400 | 0x6e0000 | +| 142 | blk.15.attn_v.weight | 0x65807400 | 0x240000 | +| 143 | blk.15.ffn_down.weight | 0x65a47400 | 0x157c000 | +| 144 | blk.15.ffn_gate.weight | 0x66fc3400 | 0x157c000 | +| 145 | blk.15.ffn_norm.weight | 0x6853f400 | 0x4000 | +| 146 | blk.15.ffn_up.weight | 0x68543400 | 0x157c000 | +| 147 | blk.16.attn_k.weight | 0x69abf400 | 0x1b8000 | +| 148 | blk.16.attn_norm.weight | 0x69c77400 | 0x4000 | +| 149 | blk.16.attn_output.weight | 0x69c7b400 | 0x880000 | +| 150 | blk.16.attn_q.weight | 0x6a4fb400 | 0x6e0000 | +| 151 | blk.16.attn_v.weight | 0x6abdb400 | 0x240000 | +| 152 | blk.16.ffn_down.weight | 0x6ae1b400 | 0x157c000 | +| 153 | blk.16.ffn_gate.weight | 0x6c397400 | 0x1324000 | +| 154 | blk.16.ffn_norm.weight | 0x6d6bb400 | 0x4000 | +| 155 | blk.16.ffn_up.weight | 0x6d6bf400 | 0x157c000 | +| 156 | blk.17.attn_k.weight | 0x6ec3b400 | 0x1b8000 | +| 157 | blk.17.attn_norm.weight | 0x6edf3400 | 0x4000 | +| 158 | blk.17.attn_output.weight | 0x6edf7400 | 0x880000 | +| 159 | blk.17.attn_q.weight | 0x6f677400 | 0x6e0000 | +| 160 | blk.17.attn_v.weight | 0x6fd57400 | 0x240000 | +| 161 | blk.17.ffn_down.weight | 0x6ff97400 | 0x157c000 | +| 162 | blk.17.ffn_gate.weight | 0x71513400 | 0x1324000 | +| 163 | blk.17.ffn_norm.weight | 0x72837400 | 0x4000 | +| 164 | blk.17.ffn_up.weight | 0x7283b400 | 0x157c000 | +| 165 | blk.18.attn_k.weight | 0x73db7400 | 0x1b8000 | +| 166 | blk.18.attn_norm.weight | 0x73f6f400 | 0x4000 | +| 167 | blk.18.attn_output.weight | 0x73f73400 | 0x880000 | +| 168 | blk.18.attn_q.weight | 0x747f3400 | 0x6e0000 | +| 169 | blk.18.attn_v.weight | 0x74ed3400 | 0x240000 | +| 170 | blk.18.ffn_down.weight | 0x75113400 | 0x157c000 | +| 171 | blk.18.ffn_gate.weight | 0x7668f400 | 0x1324000 | +| 172 | blk.18.ffn_norm.weight | 0x779b3400 | 0x4000 | +| 173 | blk.18.ffn_up.weight | 0x779b7400 | 0x157c000 | +| 174 | blk.19.attn_k.weight | 0x78f33400 | 0x1b8000 | +| 175 | blk.19.attn_norm.weight | 0x790eb400 | 0x4000 | +| 176 | blk.19.attn_output.weight | 0x790ef400 | 0x880000 | +| 177 | blk.19.attn_q.weight | 0x7996f400 | 0x6e0000 | +| 178 | blk.19.attn_v.weight | 0x7a04f400 | 0x240000 | +| 179 | blk.19.ffn_down.weight | 0x7a28f400 | 0x157c000 | +| 180 | blk.19.ffn_gate.weight | 0x7b80b400 | 0x1324000 | +| 181 | blk.19.ffn_norm.weight | 0x7cb2f400 | 0x4000 | +| 182 | blk.19.ffn_up.weight | 0x7cb33400 | 0x157c000 | +| 183 | blk.20.attn_k.weight | 0x7e0af400 | 0x1b8000 | +| 184 | blk.20.attn_norm.weight | 0x7e267400 | 0x4000 | +| 185 | blk.20.attn_output.weight | 0x7e26b400 | 0x880000 | +| 186 | blk.20.attn_q.weight | 0x7eaeb400 | 0x6e0000 | +| 187 | blk.20.attn_v.weight | 0x7f1cb400 | 0x240000 | +| 188 | blk.20.ffn_down.weight | 0x7f40b400 | 0x157c000 | +| 189 | blk.20.ffn_gate.weight | 0x80987400 | 0x1324000 | +| 190 | blk.20.ffn_norm.weight | 0x81cab400 | 0x4000 | +| 191 | blk.20.ffn_up.weight | 0x81caf400 | 0x157c000 | +| 192 | blk.21.attn_k.weight | 0x8322b400 | 0x1b8000 | +| 193 | blk.21.attn_norm.weight | 0x833e3400 | 0x4000 | +| 194 | blk.21.attn_output.weight | 0x833e7400 | 0x880000 | +| 195 | blk.21.attn_q.weight | 0x83c67400 | 0x6e0000 | +| 196 | blk.21.attn_v.weight | 0x84347400 | 0x240000 | +| 197 | blk.21.ffn_down.weight | 0x84587400 | 0x157c000 | +| 198 | blk.21.ffn_gate.weight | 0x85b03400 | 0x1324000 | +| 199 | blk.21.ffn_norm.weight | 0x86e27400 | 0x4000 | +| 200 | blk.21.ffn_up.weight | 0x86e2b400 | 0x157c000 | +| 201 | blk.22.attn_k.weight | 0x883a7400 | 0x1b8000 | +| 202 | blk.22.attn_norm.weight | 0x8855f400 | 0x4000 | +| 203 | blk.22.attn_output.weight | 0x88563400 | 0x880000 | +| 204 | blk.22.attn_q.weight | 0x88de3400 | 0x6e0000 | +| 205 | blk.22.attn_v.weight | 0x894c3400 | 0x240000 | +| 206 | blk.22.ffn_down.weight | 0x89703400 | 0x157c000 | +| 207 | blk.22.ffn_gate.weight | 0x8ac7f400 | 0x1324000 | +| 208 | blk.22.ffn_norm.weight | 0x8bfa3400 | 0x4000 | +| 209 | blk.22.ffn_up.weight | 0x8bfa7400 | 0x157c000 | +| 210 | blk.23.attn_k.weight | 0x8d523400 | 0x1b8000 | +| 211 | blk.23.attn_norm.weight | 0x8d6db400 | 0x4000 | +| 212 | blk.23.attn_output.weight | 0x8d6df400 | 0x880000 | +| 213 | blk.23.attn_q.weight | 0x8df5f400 | 0x6e0000 | +| 214 | blk.23.attn_v.weight | 0x8e63f400 | 0x240000 | +| 215 | blk.23.ffn_down.weight | 0x8e87f400 | 0x157c000 | +| 216 | blk.23.ffn_gate.weight | 0x8fdfb400 | 0x1324000 | +| 217 | blk.23.ffn_norm.weight | 0x9111f400 | 0x4000 | +| 218 | blk.23.ffn_up.weight | 0x91123400 | 0x157c000 | +| 219 | blk.24.attn_k.weight | 0x9269f400 | 0x1b8000 | +| 220 | blk.24.attn_norm.weight | 0x92857400 | 0x4000 | +| 221 | blk.24.attn_output.weight | 0x9285b400 | 0x880000 | +| 222 | blk.24.attn_q.weight | 0x930db400 | 0x6e0000 | +| 223 | blk.24.attn_v.weight | 0x937bb400 | 0x240000 | +| 224 | blk.24.ffn_down.weight | 0x939fb400 | 0x157c000 | +| 225 | blk.24.ffn_gate.weight | 0x94f77400 | 0x1324000 | +| 226 | blk.24.ffn_norm.weight | 0x9629b400 | 0x4000 | +| 227 | blk.24.ffn_up.weight | 0x9629f400 | 0x157c000 | +| 228 | blk.25.attn_k.weight | 0x9781b400 | 0x1b8000 | +| 229 | blk.25.attn_norm.weight | 0x979d3400 | 0x4000 | +| 230 | blk.25.attn_output.weight | 0x979d7400 | 0x880000 | +| 231 | blk.25.attn_q.weight | 0x98257400 | 0x6e0000 | +| 232 | blk.25.attn_v.weight | 0x98937400 | 0x240000 | +| 233 | blk.25.ffn_down.weight | 0x98b77400 | 0x157c000 | +| 234 | blk.25.ffn_gate.weight | 0x9a0f3400 | 0x1324000 | +| 235 | blk.25.ffn_norm.weight | 0x9b417400 | 0x4000 | +| 236 | blk.25.ffn_up.weight | 0x9b41b400 | 0x157c000 | +| 237 | blk.26.attn_k.weight | 0x9c997400 | 0x1b8000 | +| 238 | blk.26.attn_norm.weight | 0x9cb4f400 | 0x4000 | +| 239 | blk.26.attn_output.weight | 0x9cb53400 | 0x880000 | +| 240 | blk.26.attn_q.weight | 0x9d3d3400 | 0x6e0000 | +| 241 | blk.26.attn_v.weight | 0x9dab3400 | 0x240000 | +| 242 | blk.26.ffn_down.weight | 0x9dcf3400 | 0x157c000 | +| 243 | blk.26.ffn_gate.weight | 0x9f26f400 | 0x1324000 | +| 244 | blk.26.ffn_norm.weight | 0xa0593400 | 0x4000 | +| 245 | blk.26.ffn_up.weight | 0xa0597400 | 0x157c000 | +| 246 | blk.27.attn_k.weight | 0xa1b13400 | 0x1b8000 | +| 247 | blk.27.attn_norm.weight | 0xa1ccb400 | 0x4000 | +| 248 | blk.27.attn_output.weight | 0xa1ccf400 | 0x880000 | +| 249 | blk.27.attn_q.weight | 0xa254f400 | 0x6e0000 | +| 250 | blk.27.attn_v.weight | 0xa2c2f400 | 0x240000 | +| 251 | blk.27.ffn_down.weight | 0xa2e6f400 | 0x157c000 | +| 252 | blk.27.ffn_gate.weight | 0xa43eb400 | 0x157c000 | +| 253 | blk.27.ffn_norm.weight | 0xa5967400 | 0x4000 | +| 254 | blk.27.ffn_up.weight | 0xa596b400 | 0x157c000 | +| 255 | blk.28.attn_k.weight | 0xa6ee7400 | 0x1b8000 | +| 256 | blk.28.attn_norm.weight | 0xa709f400 | 0x4000 | +| 257 | blk.28.attn_output.weight | 0xa70a3400 | 0x880000 | +| 258 | blk.28.attn_q.weight | 0xa7923400 | 0x6e0000 | +| 259 | blk.28.attn_v.weight | 0xa8003400 | 0x240000 | +| 260 | blk.28.ffn_down.weight | 0xa8243400 | 0x157c000 | +| 261 | blk.28.ffn_gate.weight | 0xa97bf400 | 0x157c000 | +| 262 | blk.28.ffn_norm.weight | 0xaad3b400 | 0x4000 | +| 263 | blk.28.ffn_up.weight | 0xaad3f400 | 0x157c000 | +| 264 | blk.29.attn_k.weight | 0xac2bb400 | 0x1b8000 | +| 265 | blk.29.attn_norm.weight | 0xac473400 | 0x4000 | +| 266 | blk.29.attn_output.weight | 0xac477400 | 0x880000 | +| 267 | blk.29.attn_q.weight | 0xaccf7400 | 0x6e0000 | +| 268 | blk.29.attn_v.weight | 0xad3d7400 | 0x240000 | +| 269 | blk.29.ffn_down.weight | 0xad617400 | 0x157c000 | +| 270 | blk.29.ffn_gate.weight | 0xaeb93400 | 0x157c000 | +| 271 | blk.29.ffn_norm.weight | 0xb010f400 | 0x4000 | +| 272 | blk.29.ffn_up.weight | 0xb0113400 | 0x157c000 | +| 273 | blk.30.attn_k.weight | 0xb168f400 | 0x1b8000 | +| 274 | blk.30.attn_norm.weight | 0xb1847400 | 0x4000 | +| 275 | blk.30.attn_output.weight | 0xb184b400 | 0x880000 | +| 276 | blk.30.attn_q.weight | 0xb20cb400 | 0x6e0000 | +| 277 | blk.30.attn_v.weight | 0xb27ab400 | 0x240000 | +| 278 | blk.30.ffn_down.weight | 0xb29eb400 | 0x157c000 | +| 279 | blk.30.ffn_gate.weight | 0xb3f67400 | 0x157c000 | +| 280 | blk.30.ffn_norm.weight | 0xb54e3400 | 0x4000 | +| 281 | blk.30.ffn_up.weight | 0xb54e7400 | 0x157c000 | +| 282 | blk.31.attn_k.weight | 0xb6a63400 | 0x1b8000 | +| 283 | blk.31.attn_norm.weight | 0xb6c1b400 | 0x4000 | +| 284 | blk.31.attn_output.weight | 0xb6c1f400 | 0x880000 | +| 285 | blk.31.attn_q.weight | 0xb749f400 | 0x6e0000 | +| 286 | blk.31.attn_v.weight | 0xb7b7f400 | 0x240000 | +| 287 | blk.31.ffn_down.weight | 0xb7dbf400 | 0x157c000 | +| 288 | blk.31.ffn_gate.weight | 0xb933b400 | 0x157c000 | +| 289 | blk.31.ffn_norm.weight | 0xba8b7400 | 0x4000 | +| 290 | blk.31.ffn_up.weight | 0xba8bb400 | 0x157c000 | +| 291 | blk.32.attn_k.weight | 0xbbe37400 | 0x1b8000 | +| 292 | blk.32.attn_norm.weight | 0xbbfef400 | 0x4000 | +| 293 | blk.32.attn_output.weight | 0xbbff3400 | 0x880000 | +| 294 | blk.32.attn_q.weight | 0xbc873400 | 0x6e0000 | +| 295 | blk.32.attn_v.weight | 0xbcf53400 | 0x240000 | +| 296 | blk.32.ffn_down.weight | 0xbd193400 | 0x157c000 | +| 297 | blk.32.ffn_gate.weight | 0xbe70f400 | 0x157c000 | +| 298 | blk.32.ffn_norm.weight | 0xbfc8b400 | 0x4000 | +| 299 | blk.32.ffn_up.weight | 0xbfc8f400 | 0x157c000 | +| 300 | blk.33.attn_k.weight | 0xc120b400 | 0x1b8000 | +| 301 | blk.33.attn_norm.weight | 0xc13c3400 | 0x4000 | +| 302 | blk.33.attn_output.weight | 0xc13c7400 | 0x880000 | +| 303 | blk.33.attn_q.weight | 0xc1c47400 | 0x6e0000 | +| 304 | blk.33.attn_v.weight | 0xc2327400 | 0x240000 | +| 305 | blk.33.ffn_down.weight | 0xc2567400 | 0x157c000 | +| 306 | blk.33.ffn_gate.weight | 0xc3ae3400 | 0x157c000 | +| 307 | blk.33.ffn_norm.weight | 0xc505f400 | 0x4000 | +| 308 | blk.33.ffn_up.weight | 0xc5063400 | 0x157c000 | +| 309 | blk.34.attn_k.weight | 0xc65df400 | 0x1b8000 | +| 310 | blk.34.attn_norm.weight | 0xc6797400 | 0x4000 | +| 311 | blk.34.attn_output.weight | 0xc679b400 | 0x880000 | +| 312 | blk.34.attn_q.weight | 0xc701b400 | 0x6e0000 | +| 313 | blk.34.attn_v.weight | 0xc76fb400 | 0x240000 | +| 314 | blk.34.ffn_down.weight | 0xc793b400 | 0x157c000 | +| 315 | blk.34.ffn_gate.weight | 0xc8eb7400 | 0x1324000 | +| 316 | blk.34.ffn_norm.weight | 0xca1db400 | 0x4000 | +| 317 | blk.34.ffn_up.weight | 0xca1df400 | 0x157c000 | +| 318 | blk.35.attn_k.weight | 0xcb75b400 | 0x1b8000 | +| 319 | blk.35.attn_norm.weight | 0xcb913400 | 0x4000 | +| 320 | blk.35.attn_output.weight | 0xcb917400 | 0x880000 | +| 321 | blk.35.attn_q.weight | 0xcc197400 | 0x6e0000 | +| 322 | blk.35.attn_v.weight | 0xcc877400 | 0x220000 | +| 323 | blk.35.ffn_down.weight | 0xcca97400 | 0x157c000 | +| 324 | blk.35.ffn_gate.weight | 0xce013400 | 0x157c000 | +| 325 | blk.35.ffn_norm.weight | 0xcf58f400 | 0x4000 | +| 326 | blk.35.ffn_up.weight | 0xcf593400 | 0x157c000 | +| 327 | blk.36.attn_k.weight | 0xd0b0f400 | 0x1b8000 | +| 328 | blk.36.attn_norm.weight | 0xd0cc7400 | 0x4000 | +| 329 | blk.36.attn_output.weight | 0xd0ccb400 | 0x880000 | +| 330 | blk.36.attn_q.weight | 0xd154b400 | 0x6e0000 | +| 331 | blk.36.attn_v.weight | 0xd1c2b400 | 0x220000 | +| 332 | blk.36.ffn_down.weight | 0xd1e4b400 | 0x157c000 | +| 333 | blk.36.ffn_gate.weight | 0xd33c7400 | 0x157c000 | +| 334 | blk.36.ffn_norm.weight | 0xd4943400 | 0x4000 | +| 335 | blk.36.ffn_up.weight | 0xd4947400 | 0x157c000 | +| 336 | blk.37.attn_k.weight | 0xd5ec3400 | 0x1b8000 | +| 337 | blk.37.attn_norm.weight | 0xd607b400 | 0x4000 | +| 338 | blk.37.attn_output.weight | 0xd607f400 | 0x880000 | +| 339 | blk.37.attn_q.weight | 0xd68ff400 | 0x6e0000 | +| 340 | blk.37.attn_v.weight | 0xd6fdf400 | 0x220000 | +| 341 | blk.37.ffn_down.weight | 0xd71ff400 | 0x157c000 | +| 342 | blk.37.ffn_gate.weight | 0xd877b400 | 0x157c000 | +| 343 | blk.37.ffn_norm.weight | 0xd9cf7400 | 0x4000 | +| 344 | blk.37.ffn_up.weight | 0xd9cfb400 | 0x157c000 | +| 345 | blk.38.attn_k.weight | 0xdb277400 | 0x1b8000 | +| 346 | blk.38.attn_norm.weight | 0xdb42f400 | 0x4000 | +| 347 | blk.38.attn_output.weight | 0xdb433400 | 0x880000 | +| 348 | blk.38.attn_q.weight | 0xdbcb3400 | 0x6e0000 | +| 349 | blk.38.attn_v.weight | 0xdc393400 | 0x220000 | +| 350 | blk.38.ffn_down.weight | 0xdc5b3400 | 0x157c000 | +| 351 | blk.38.ffn_gate.weight | 0xddb2f400 | 0x157c000 | +| 352 | blk.38.ffn_norm.weight | 0xdf0ab400 | 0x4000 | +| 353 | blk.38.ffn_up.weight | 0xdf0af400 | 0x157c000 | +| 354 | blk.39.attn_k.weight | 0xe062b400 | 0x1b8000 | +| 355 | blk.39.attn_norm.weight | 0xe07e3400 | 0x4000 | +| 356 | blk.39.attn_output.weight | 0xe07e7400 | 0x880000 | +| 357 | blk.39.attn_q.weight | 0xe1067400 | 0x6e0000 | +| 358 | blk.39.attn_v.weight | 0xe1747400 | 0x220000 | +| 359 | blk.39.ffn_down.weight | 0xe1967400 | 0x1324000 | +| 360 | blk.39.ffn_gate.weight | 0xe2c8b400 | 0x157c000 | +| 361 | blk.39.ffn_norm.weight | 0xe4207400 | 0x4000 | +| 362 | blk.39.ffn_up.weight | 0xe420b400 | 0x157c000 | + +### Base Tensor Group : ~822M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------|:---------------------------------|:------------------|:----------------------|:-----|--------:| +| 0 | output.weight | Output (W) | (~411M) 411041792 | 4096 x 100352 x 1 x 1 | Q4_K | 4.5000 | +| 1 | output_norm.weight | Output Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 2 | token_embd.weight | Token Embedding (W) | (~411M) 411041792 | 4096 x 100352 x 1 x 1 | Q3_K | 3.4375 | + +- Total elements in base: (~822M) 822087680 +- Percentage of total elements: 9.35% +- Bits per Weight (BPW) for base: 3.9689 bits + + +### Block 0 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 3 | blk.0.attn_k.weight | Block 0 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ2_XS | 2.3125 | +| 4 | blk.0.attn_norm.weight | Block 0 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 5 | blk.0.attn_output.weight | Block 0 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_XXS | 3.0625 | +| 6 | blk.0.attn_q.weight | Block 0 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ2_XS | 2.3125 | +| 7 | blk.0.attn_v.weight | Block 0 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_XXS | 3.0625 | +| 8 | blk.0.ffn_down.weight | Block 0 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q2_K | 2.6250 | +| 9 | blk.0.ffn_gate.weight | Block 0 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ2_XXS | 2.0625 | +| 10 | blk.0.ffn_norm.weight | Block 0 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 11 | blk.0.ffn_up.weight | Block 0 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | + +- Total elements in blk.0: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.0: 2.6065 bits + + +### Block 1 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 12 | blk.1.attn_k.weight | Block 1 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_XXS | 3.0625 | +| 13 | blk.1.attn_norm.weight | Block 1 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 14 | blk.1.attn_output.weight | Block 1 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 15 | blk.1.attn_q.weight | Block 1 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 16 | blk.1.attn_v.weight | Block 1 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 17 | blk.1.ffn_down.weight | Block 1 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 18 | blk.1.ffn_gate.weight | Block 1 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 19 | blk.1.ffn_norm.weight | Block 1 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 20 | blk.1.ffn_up.weight | Block 1 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.1: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.1: 3.6367 bits + + +### Block 2 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 21 | blk.2.attn_k.weight | Block 2 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 22 | blk.2.attn_norm.weight | Block 2 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 23 | blk.2.attn_output.weight | Block 2 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 24 | blk.2.attn_q.weight | Block 2 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 25 | blk.2.attn_v.weight | Block 2 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 26 | blk.2.ffn_down.weight | Block 2 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 27 | blk.2.ffn_gate.weight | Block 2 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 28 | blk.2.ffn_norm.weight | Block 2 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 29 | blk.2.ffn_up.weight | Block 2 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.2: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.2: 3.6393 bits + + +### Block 3 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 30 | blk.3.attn_k.weight | Block 3 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_XXS | 3.0625 | +| 31 | blk.3.attn_norm.weight | Block 3 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 32 | blk.3.attn_output.weight | Block 3 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 33 | blk.3.attn_q.weight | Block 3 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 34 | blk.3.attn_v.weight | Block 3 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 35 | blk.3.ffn_down.weight | Block 3 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_XXS | 3.0625 | +| 36 | blk.3.ffn_gate.weight | Block 3 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | +| 37 | blk.3.ffn_norm.weight | Block 3 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 38 | blk.3.ffn_up.weight | Block 3 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.3: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.3: 3.4229 bits + + +### Block 4 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 39 | blk.4.attn_k.weight | Block 4 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 40 | blk.4.attn_norm.weight | Block 4 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 41 | blk.4.attn_output.weight | Block 4 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 42 | blk.4.attn_q.weight | Block 4 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 43 | blk.4.attn_v.weight | Block 4 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 44 | blk.4.ffn_down.weight | Block 4 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 45 | blk.4.ffn_gate.weight | Block 4 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | +| 46 | blk.4.ffn_norm.weight | Block 4 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 47 | blk.4.ffn_up.weight | Block 4 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.4: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.4: 3.5295 bits + + +### Block 5 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 48 | blk.5.attn_k.weight | Block 5 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 49 | blk.5.attn_norm.weight | Block 5 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 50 | blk.5.attn_output.weight | Block 5 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 51 | blk.5.attn_q.weight | Block 5 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 52 | blk.5.attn_v.weight | Block 5 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 53 | blk.5.ffn_down.weight | Block 5 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_XXS | 3.0625 | +| 54 | blk.5.ffn_gate.weight | Block 5 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | +| 55 | blk.5.ffn_norm.weight | Block 5 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 56 | blk.5.ffn_up.weight | Block 5 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.5: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.5: 3.4308 bits + + +### Block 6 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 57 | blk.6.attn_k.weight | Block 6 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 58 | blk.6.attn_norm.weight | Block 6 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 59 | blk.6.attn_output.weight | Block 6 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 60 | blk.6.attn_q.weight | Block 6 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 61 | blk.6.attn_v.weight | Block 6 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 62 | blk.6.ffn_down.weight | Block 6 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 63 | blk.6.ffn_gate.weight | Block 6 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 64 | blk.6.ffn_norm.weight | Block 6 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 65 | blk.6.ffn_up.weight | Block 6 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.6: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.6: 3.4308 bits + + +### Block 7 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 66 | blk.7.attn_k.weight | Block 7 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 67 | blk.7.attn_norm.weight | Block 7 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 68 | blk.7.attn_output.weight | Block 7 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 69 | blk.7.attn_q.weight | Block 7 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 70 | blk.7.attn_v.weight | Block 7 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 71 | blk.7.ffn_down.weight | Block 7 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 72 | blk.7.ffn_gate.weight | Block 7 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 73 | blk.7.ffn_norm.weight | Block 7 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 74 | blk.7.ffn_up.weight | Block 7 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.7: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.7: 3.4308 bits + + +### Block 8 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 75 | blk.8.attn_k.weight | Block 8 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 76 | blk.8.attn_norm.weight | Block 8 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 77 | blk.8.attn_output.weight | Block 8 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 78 | blk.8.attn_q.weight | Block 8 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 79 | blk.8.attn_v.weight | Block 8 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 80 | blk.8.ffn_down.weight | Block 8 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 81 | blk.8.ffn_gate.weight | Block 8 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 82 | blk.8.ffn_norm.weight | Block 8 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 83 | blk.8.ffn_up.weight | Block 8 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.8: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.8: 3.4308 bits + + +### Block 9 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 84 | blk.9.attn_k.weight | Block 9 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 85 | blk.9.attn_norm.weight | Block 9 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 86 | blk.9.attn_output.weight | Block 9 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 87 | blk.9.attn_q.weight | Block 9 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 88 | blk.9.attn_v.weight | Block 9 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 89 | blk.9.ffn_down.weight | Block 9 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 90 | blk.9.ffn_gate.weight | Block 9 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 91 | blk.9.ffn_norm.weight | Block 9 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 92 | blk.9.ffn_up.weight | Block 9 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.9: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.9: 3.4308 bits + + +### Block 10 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 93 | blk.10.attn_k.weight | Block 10 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 94 | blk.10.attn_norm.weight | Block 10 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 95 | blk.10.attn_output.weight | Block 10 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 96 | blk.10.attn_q.weight | Block 10 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 97 | blk.10.attn_v.weight | Block 10 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 98 | blk.10.ffn_down.weight | Block 10 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 99 | blk.10.ffn_gate.weight | Block 10 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 100 | blk.10.ffn_norm.weight | Block 10 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 101 | blk.10.ffn_up.weight | Block 10 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.10: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.10: 3.4308 bits + + +### Block 11 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 102 | blk.11.attn_k.weight | Block 11 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 103 | blk.11.attn_norm.weight | Block 11 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 104 | blk.11.attn_output.weight | Block 11 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 105 | blk.11.attn_q.weight | Block 11 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 106 | blk.11.attn_v.weight | Block 11 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 107 | blk.11.ffn_down.weight | Block 11 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 108 | blk.11.ffn_gate.weight | Block 11 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 109 | blk.11.ffn_norm.weight | Block 11 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 110 | blk.11.ffn_up.weight | Block 11 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.11: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.11: 3.4255 bits + + +### Block 12 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 111 | blk.12.attn_k.weight | Block 12 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 112 | blk.12.attn_norm.weight | Block 12 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 113 | blk.12.attn_output.weight | Block 12 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 114 | blk.12.attn_q.weight | Block 12 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 115 | blk.12.attn_v.weight | Block 12 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 116 | blk.12.ffn_down.weight | Block 12 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 117 | blk.12.ffn_gate.weight | Block 12 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 118 | blk.12.ffn_norm.weight | Block 12 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 119 | blk.12.ffn_up.weight | Block 12 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.12: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.12: 3.4308 bits + + +### Block 13 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 120 | blk.13.attn_k.weight | Block 13 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 121 | blk.13.attn_norm.weight | Block 13 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 122 | blk.13.attn_output.weight | Block 13 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 123 | blk.13.attn_q.weight | Block 13 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 124 | blk.13.attn_v.weight | Block 13 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 125 | blk.13.ffn_down.weight | Block 13 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 126 | blk.13.ffn_gate.weight | Block 13 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 127 | blk.13.ffn_norm.weight | Block 13 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 128 | blk.13.ffn_up.weight | Block 13 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.13: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.13: 3.4308 bits + + +### Block 14 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 129 | blk.14.attn_k.weight | Block 14 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 130 | blk.14.attn_norm.weight | Block 14 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 131 | blk.14.attn_output.weight | Block 14 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 132 | blk.14.attn_q.weight | Block 14 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 133 | blk.14.attn_v.weight | Block 14 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 134 | blk.14.ffn_down.weight | Block 14 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 135 | blk.14.ffn_gate.weight | Block 14 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 136 | blk.14.ffn_norm.weight | Block 14 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 137 | blk.14.ffn_up.weight | Block 14 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.14: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.14: 3.4308 bits + + +### Block 15 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 138 | blk.15.attn_k.weight | Block 15 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 139 | blk.15.attn_norm.weight | Block 15 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 140 | blk.15.attn_output.weight | Block 15 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 141 | blk.15.attn_q.weight | Block 15 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 142 | blk.15.attn_v.weight | Block 15 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 143 | blk.15.ffn_down.weight | Block 15 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 144 | blk.15.ffn_gate.weight | Block 15 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | +| 145 | blk.15.ffn_norm.weight | Block 15 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 146 | blk.15.ffn_up.weight | Block 15 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.15: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.15: 3.5295 bits + + +### Block 16 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 147 | blk.16.attn_k.weight | Block 16 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 148 | blk.16.attn_norm.weight | Block 16 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 149 | blk.16.attn_output.weight | Block 16 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 150 | blk.16.attn_q.weight | Block 16 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 151 | blk.16.attn_v.weight | Block 16 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 152 | blk.16.ffn_down.weight | Block 16 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 153 | blk.16.ffn_gate.weight | Block 16 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 154 | blk.16.ffn_norm.weight | Block 16 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 155 | blk.16.ffn_up.weight | Block 16 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.16: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.16: 3.4308 bits + + +### Block 17 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 156 | blk.17.attn_k.weight | Block 17 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 157 | blk.17.attn_norm.weight | Block 17 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 158 | blk.17.attn_output.weight | Block 17 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 159 | blk.17.attn_q.weight | Block 17 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 160 | blk.17.attn_v.weight | Block 17 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 161 | blk.17.ffn_down.weight | Block 17 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 162 | blk.17.ffn_gate.weight | Block 17 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 163 | blk.17.ffn_norm.weight | Block 17 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 164 | blk.17.ffn_up.weight | Block 17 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.17: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.17: 3.4308 bits + + +### Block 18 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 165 | blk.18.attn_k.weight | Block 18 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 166 | blk.18.attn_norm.weight | Block 18 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 167 | blk.18.attn_output.weight | Block 18 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 168 | blk.18.attn_q.weight | Block 18 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 169 | blk.18.attn_v.weight | Block 18 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 170 | blk.18.ffn_down.weight | Block 18 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 171 | blk.18.ffn_gate.weight | Block 18 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 172 | blk.18.ffn_norm.weight | Block 18 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 173 | blk.18.ffn_up.weight | Block 18 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.18: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.18: 3.4308 bits + + +### Block 19 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 174 | blk.19.attn_k.weight | Block 19 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 175 | blk.19.attn_norm.weight | Block 19 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 176 | blk.19.attn_output.weight | Block 19 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 177 | blk.19.attn_q.weight | Block 19 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 178 | blk.19.attn_v.weight | Block 19 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 179 | blk.19.ffn_down.weight | Block 19 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 180 | blk.19.ffn_gate.weight | Block 19 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 181 | blk.19.ffn_norm.weight | Block 19 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 182 | blk.19.ffn_up.weight | Block 19 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.19: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.19: 3.4308 bits + + +### Block 20 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 183 | blk.20.attn_k.weight | Block 20 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 184 | blk.20.attn_norm.weight | Block 20 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 185 | blk.20.attn_output.weight | Block 20 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 186 | blk.20.attn_q.weight | Block 20 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 187 | blk.20.attn_v.weight | Block 20 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 188 | blk.20.ffn_down.weight | Block 20 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 189 | blk.20.ffn_gate.weight | Block 20 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 190 | blk.20.ffn_norm.weight | Block 20 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 191 | blk.20.ffn_up.weight | Block 20 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.20: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.20: 3.4308 bits + + +### Block 21 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 192 | blk.21.attn_k.weight | Block 21 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 193 | blk.21.attn_norm.weight | Block 21 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 194 | blk.21.attn_output.weight | Block 21 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 195 | blk.21.attn_q.weight | Block 21 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 196 | blk.21.attn_v.weight | Block 21 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 197 | blk.21.ffn_down.weight | Block 21 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 198 | blk.21.ffn_gate.weight | Block 21 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 199 | blk.21.ffn_norm.weight | Block 21 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 200 | blk.21.ffn_up.weight | Block 21 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.21: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.21: 3.4308 bits + + +### Block 22 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 201 | blk.22.attn_k.weight | Block 22 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 202 | blk.22.attn_norm.weight | Block 22 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 203 | blk.22.attn_output.weight | Block 22 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 204 | blk.22.attn_q.weight | Block 22 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 205 | blk.22.attn_v.weight | Block 22 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 206 | blk.22.ffn_down.weight | Block 22 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 207 | blk.22.ffn_gate.weight | Block 22 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 208 | blk.22.ffn_norm.weight | Block 22 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 209 | blk.22.ffn_up.weight | Block 22 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.22: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.22: 3.4308 bits + + +### Block 23 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 210 | blk.23.attn_k.weight | Block 23 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 211 | blk.23.attn_norm.weight | Block 23 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 212 | blk.23.attn_output.weight | Block 23 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 213 | blk.23.attn_q.weight | Block 23 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 214 | blk.23.attn_v.weight | Block 23 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 215 | blk.23.ffn_down.weight | Block 23 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 216 | blk.23.ffn_gate.weight | Block 23 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 217 | blk.23.ffn_norm.weight | Block 23 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 218 | blk.23.ffn_up.weight | Block 23 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.23: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.23: 3.4308 bits + + +### Block 24 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 219 | blk.24.attn_k.weight | Block 24 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 220 | blk.24.attn_norm.weight | Block 24 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 221 | blk.24.attn_output.weight | Block 24 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 222 | blk.24.attn_q.weight | Block 24 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 223 | blk.24.attn_v.weight | Block 24 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 224 | blk.24.ffn_down.weight | Block 24 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 225 | blk.24.ffn_gate.weight | Block 24 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 226 | blk.24.ffn_norm.weight | Block 24 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 227 | blk.24.ffn_up.weight | Block 24 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.24: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.24: 3.4308 bits + + +### Block 25 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 228 | blk.25.attn_k.weight | Block 25 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 229 | blk.25.attn_norm.weight | Block 25 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 230 | blk.25.attn_output.weight | Block 25 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 231 | blk.25.attn_q.weight | Block 25 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 232 | blk.25.attn_v.weight | Block 25 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 233 | blk.25.ffn_down.weight | Block 25 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 234 | blk.25.ffn_gate.weight | Block 25 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 235 | blk.25.ffn_norm.weight | Block 25 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 236 | blk.25.ffn_up.weight | Block 25 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.25: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.25: 3.4308 bits + + +### Block 26 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 237 | blk.26.attn_k.weight | Block 26 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 238 | blk.26.attn_norm.weight | Block 26 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 239 | blk.26.attn_output.weight | Block 26 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 240 | blk.26.attn_q.weight | Block 26 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 241 | blk.26.attn_v.weight | Block 26 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 242 | blk.26.ffn_down.weight | Block 26 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 243 | blk.26.ffn_gate.weight | Block 26 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 244 | blk.26.ffn_norm.weight | Block 26 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 245 | blk.26.ffn_up.weight | Block 26 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.26: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.26: 3.4308 bits + + +### Block 27 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 246 | blk.27.attn_k.weight | Block 27 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 247 | blk.27.attn_norm.weight | Block 27 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 248 | blk.27.attn_output.weight | Block 27 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 249 | blk.27.attn_q.weight | Block 27 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 250 | blk.27.attn_v.weight | Block 27 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 251 | blk.27.ffn_down.weight | Block 27 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 252 | blk.27.ffn_gate.weight | Block 27 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | +| 253 | blk.27.ffn_norm.weight | Block 27 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 254 | blk.27.ffn_up.weight | Block 27 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.27: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.27: 3.5295 bits + + +### Block 28 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 255 | blk.28.attn_k.weight | Block 28 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 256 | blk.28.attn_norm.weight | Block 28 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 257 | blk.28.attn_output.weight | Block 28 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 258 | blk.28.attn_q.weight | Block 28 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 259 | blk.28.attn_v.weight | Block 28 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 260 | blk.28.ffn_down.weight | Block 28 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 261 | blk.28.ffn_gate.weight | Block 28 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | +| 262 | blk.28.ffn_norm.weight | Block 28 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 263 | blk.28.ffn_up.weight | Block 28 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.28: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.28: 3.5295 bits + + +### Block 29 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 264 | blk.29.attn_k.weight | Block 29 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 265 | blk.29.attn_norm.weight | Block 29 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 266 | blk.29.attn_output.weight | Block 29 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 267 | blk.29.attn_q.weight | Block 29 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 268 | blk.29.attn_v.weight | Block 29 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 269 | blk.29.ffn_down.weight | Block 29 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 270 | blk.29.ffn_gate.weight | Block 29 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | +| 271 | blk.29.ffn_norm.weight | Block 29 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 272 | blk.29.ffn_up.weight | Block 29 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.29: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.29: 3.5295 bits + + +### Block 30 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 273 | blk.30.attn_k.weight | Block 30 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 274 | blk.30.attn_norm.weight | Block 30 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 275 | blk.30.attn_output.weight | Block 30 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 276 | blk.30.attn_q.weight | Block 30 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 277 | blk.30.attn_v.weight | Block 30 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 278 | blk.30.ffn_down.weight | Block 30 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 279 | blk.30.ffn_gate.weight | Block 30 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | +| 280 | blk.30.ffn_norm.weight | Block 30 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 281 | blk.30.ffn_up.weight | Block 30 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.30: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.30: 3.5295 bits + + +### Block 31 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 282 | blk.31.attn_k.weight | Block 31 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 283 | blk.31.attn_norm.weight | Block 31 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 284 | blk.31.attn_output.weight | Block 31 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 285 | blk.31.attn_q.weight | Block 31 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 286 | blk.31.attn_v.weight | Block 31 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 287 | blk.31.ffn_down.weight | Block 31 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 288 | blk.31.ffn_gate.weight | Block 31 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | +| 289 | blk.31.ffn_norm.weight | Block 31 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 290 | blk.31.ffn_up.weight | Block 31 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.31: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.31: 3.5295 bits + + +### Block 32 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 291 | blk.32.attn_k.weight | Block 32 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 292 | blk.32.attn_norm.weight | Block 32 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 293 | blk.32.attn_output.weight | Block 32 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 294 | blk.32.attn_q.weight | Block 32 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 295 | blk.32.attn_v.weight | Block 32 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 296 | blk.32.ffn_down.weight | Block 32 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 297 | blk.32.ffn_gate.weight | Block 32 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | +| 298 | blk.32.ffn_norm.weight | Block 32 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 299 | blk.32.ffn_up.weight | Block 32 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.32: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.32: 3.5295 bits + + +### Block 33 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 300 | blk.33.attn_k.weight | Block 33 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 301 | blk.33.attn_norm.weight | Block 33 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 302 | blk.33.attn_output.weight | Block 33 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 303 | blk.33.attn_q.weight | Block 33 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 304 | blk.33.attn_v.weight | Block 33 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 305 | blk.33.ffn_down.weight | Block 33 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 306 | blk.33.ffn_gate.weight | Block 33 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | +| 307 | blk.33.ffn_norm.weight | Block 33 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 308 | blk.33.ffn_up.weight | Block 33 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.33: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.33: 3.5295 bits + + +### Block 34 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 309 | blk.34.attn_k.weight | Block 34 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 310 | blk.34.attn_norm.weight | Block 34 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 311 | blk.34.attn_output.weight | Block 34 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 312 | blk.34.attn_q.weight | Block 34 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 313 | blk.34.attn_v.weight | Block 34 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 314 | blk.34.ffn_down.weight | Block 34 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 315 | blk.34.ffn_gate.weight | Block 34 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 316 | blk.34.ffn_norm.weight | Block 34 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 317 | blk.34.ffn_up.weight | Block 34 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.34: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.34: 3.4308 bits + + +### Block 35 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 318 | blk.35.attn_k.weight | Block 35 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 319 | blk.35.attn_norm.weight | Block 35 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 320 | blk.35.attn_output.weight | Block 35 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 321 | blk.35.attn_q.weight | Block 35 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 322 | blk.35.attn_v.weight | Block 35 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 323 | blk.35.ffn_down.weight | Block 35 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 324 | blk.35.ffn_gate.weight | Block 35 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | +| 325 | blk.35.ffn_norm.weight | Block 35 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 326 | blk.35.ffn_up.weight | Block 35 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.35: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.35: 3.5242 bits + + +### Block 36 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 327 | blk.36.attn_k.weight | Block 36 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 328 | blk.36.attn_norm.weight | Block 36 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 329 | blk.36.attn_output.weight | Block 36 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 330 | blk.36.attn_q.weight | Block 36 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 331 | blk.36.attn_v.weight | Block 36 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 332 | blk.36.ffn_down.weight | Block 36 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 333 | blk.36.ffn_gate.weight | Block 36 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | +| 334 | blk.36.ffn_norm.weight | Block 36 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 335 | blk.36.ffn_up.weight | Block 36 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.36: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.36: 3.5242 bits + + +### Block 37 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 336 | blk.37.attn_k.weight | Block 37 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 337 | blk.37.attn_norm.weight | Block 37 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 338 | blk.37.attn_output.weight | Block 37 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 339 | blk.37.attn_q.weight | Block 37 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 340 | blk.37.attn_v.weight | Block 37 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 341 | blk.37.ffn_down.weight | Block 37 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 342 | blk.37.ffn_gate.weight | Block 37 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | +| 343 | blk.37.ffn_norm.weight | Block 37 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 344 | blk.37.ffn_up.weight | Block 37 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.37: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.37: 3.5242 bits + + +### Block 38 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 345 | blk.38.attn_k.weight | Block 38 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 346 | blk.38.attn_norm.weight | Block 38 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 347 | blk.38.attn_output.weight | Block 38 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 348 | blk.38.attn_q.weight | Block 38 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 349 | blk.38.attn_v.weight | Block 38 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 350 | blk.38.ffn_down.weight | Block 38 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 351 | blk.38.ffn_gate.weight | Block 38 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | +| 352 | blk.38.ffn_norm.weight | Block 38 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 353 | blk.38.ffn_up.weight | Block 38 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.38: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.38: 3.5242 bits + + +### Block 39 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 354 | blk.39.attn_k.weight | Block 39 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 355 | blk.39.attn_norm.weight | Block 39 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 356 | blk.39.attn_output.weight | Block 39 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 357 | blk.39.attn_q.weight | Block 39 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_S | 3.4375 | +| 358 | blk.39.attn_v.weight | Block 39 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 359 | blk.39.ffn_down.weight | Block 39 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ3_XXS | 3.0625 | +| 360 | blk.39.ffn_gate.weight | Block 39 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | +| 361 | blk.39.ffn_norm.weight | Block 39 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 362 | blk.39.ffn_up.weight | Block 39 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_S | 3.4375 | + +- Total elements in blk.39: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.39: 3.4255 bits + + +Total BPW for granite-4.1-8b-Q3_K.gguf: 3.5000 bits diff --git a/scores/granite-4.1-8b-Q4_K.md b/scores/granite-4.1-8b-Q4_K.md new file mode 100644 index 0000000..2e52bc1 --- /dev/null +++ b/scores/granite-4.1-8b-Q4_K.md @@ -0,0 +1,1247 @@ +# granite-4.1-8b-WIP/granite-4.1-8b-Q4_K.gguf - GGUF Internal File Dump + +- Endian: LITTLE endian + +## Key Value Metadata Store + +There are 42 key-value pairs in this file + +| POS | TYPE | Count | Key | Value | +|----:|:---------|-------:|:-----------------------------------------|:--------------------------------------------------------------------| +| 1 | UINT32 | 1 | GGUF.version | 3 | +| 2 | UINT64 | 1 | GGUF.tensor_count | 363 | +| 3 | UINT64 | 1 | GGUF.kv_count | 39 | +| 4 | STRING | 1 | general.architecture | `granite` | +| 5 | STRING | 1 | general.type | `model` | +| 6 | STRING | 1 | general.name | `Granite 4.1 8b` | +| 7 | STRING | 1 | general.basename | `granite-4.1` | +| 8 | STRING | 1 | general.size_label | `8B` | +| 9 | STRING | 1 | general.license | `apache-2.0` | +| 10 | [STRING] | 2 | general.tags | [ `language`, `granite-4.1` ] | +| 11 | UINT32 | 1 | granite.block_count | 40 | +| 12 | UINT32 | 1 | granite.context_length | 131072 | +| 13 | UINT32 | 1 | granite.embedding_length | 4096 | +| 14 | UINT32 | 1 | granite.feed_forward_length | 12800 | +| 15 | UINT32 | 1 | granite.attention.head_count | 32 | +| 16 | UINT32 | 1 | granite.attention.head_count_kv | 8 | +| 17 | FLOAT32 | 1 | granite.rope.freq_base | 10000000.0 | +| 18 | FLOAT32 | 1 | granite.attention.layer_norm_rms_epsilon | 1e-05 | +| 19 | UINT32 | 1 | granite.vocab_size | 100352 | +| 20 | UINT32 | 1 | granite.rope.dimension_count | 128 | +| 21 | FLOAT32 | 1 | granite.attention.scale | 0.0078125 | +| 22 | FLOAT32 | 1 | granite.embedding_scale | 12.0 | +| 23 | FLOAT32 | 1 | granite.residual_scale | 0.22 | +| 24 | FLOAT32 | 1 | granite.logit_scale | 16.0 | +| 25 | STRING | 1 | tokenizer.ggml.model | `gpt2` | +| 26 | STRING | 1 | tokenizer.ggml.pre | `granite-docling` | +| 27 | [STRING] | 100352 | tokenizer.ggml.tokens | [ `!`, `"`, `#`, `$`, `%`, ... ] | +| 28 | [INT32] | 100352 | tokenizer.ggml.token_type | [ 1, 1, 1, 1, 1, 1, 1, ... ] | +| 29 | [STRING] | 100000 | tokenizer.ggml.merges | [ `Ġ Ġ`, `ĠĠ ĠĠ`, `i n`, `Ġ t`, `ĠĠĠĠ ĠĠĠĠ`, ... ] | +| 30 | UINT32 | 1 | tokenizer.ggml.bos_token_id | 100257 | +| 31 | UINT32 | 1 | tokenizer.ggml.eos_token_id | 100257 | +| 32 | UINT32 | 1 | tokenizer.ggml.unknown_token_id | 100269 | +| 33 | UINT32 | 1 | tokenizer.ggml.padding_token_id | 100256 | +| 34 | BOOL | 1 | tokenizer.ggml.add_bos_token | False | +| 35 | STRING | 1 | tokenizer.chat_template | `{%- set tools_system_message_p`...`end_of_role|>' }}{%- endif %}` | +| 36 | BOOL | 1 | tokenizer.ggml.add_space_prefix | False | +| 37 | UINT32 | 1 | general.quantization_version | 2 | +| 38 | UINT32 | 1 | general.file_type | 15 | +| 39 | STRING | 1 | quantize.imatrix.file | `imatrix/imatrix-granite-4.1-8b-medium.gguf` | +| 40 | STRING | 1 | quantize.imatrix.dataset | `../../datasets/imatrix/combined_all_medium.txt` | +| 41 | UINT32 | 1 | quantize.imatrix.entries_count | 280 | +| 42 | UINT32 | 1 | quantize.imatrix.chunks_count | 3601 | + +## Tensors Overview ~9B Elements + +Total number of elements in all tensors: 8791592960 Elements + +- [granite-4.1-8b-WIP/granite-4.1-8b-Q4\_K.gguf - GGUF Internal File Dump](#granite-41-8b-wipgranite-41-8b-q4_kgguf---gguf-internal-file-dump) + - [Key Value Metadata Store](#key-value-metadata-store) + - [Tensors Overview ~9B Elements](#tensors-overview-9b-elements) + - [Tensor Data Offset](#tensor-data-offset) + - [Base Tensor Group : ~822M Elements](#base-tensor-group--822m-elements) + - [Block 0 Tensor Group : ~199M Elements](#block-0-tensor-group--199m-elements) + - [Block 1 Tensor Group : ~199M Elements](#block-1-tensor-group--199m-elements) + - [Block 2 Tensor Group : ~199M Elements](#block-2-tensor-group--199m-elements) + - [Block 3 Tensor Group : ~199M Elements](#block-3-tensor-group--199m-elements) + - [Block 4 Tensor Group : ~199M Elements](#block-4-tensor-group--199m-elements) + - [Block 5 Tensor Group : ~199M Elements](#block-5-tensor-group--199m-elements) + - [Block 6 Tensor Group : ~199M Elements](#block-6-tensor-group--199m-elements) + - [Block 7 Tensor Group : ~199M Elements](#block-7-tensor-group--199m-elements) + - [Block 8 Tensor Group : ~199M Elements](#block-8-tensor-group--199m-elements) + - [Block 9 Tensor Group : ~199M Elements](#block-9-tensor-group--199m-elements) + - [Block 10 Tensor Group : ~199M Elements](#block-10-tensor-group--199m-elements) + - [Block 11 Tensor Group : ~199M Elements](#block-11-tensor-group--199m-elements) + - [Block 12 Tensor Group : ~199M Elements](#block-12-tensor-group--199m-elements) + - [Block 13 Tensor Group : ~199M Elements](#block-13-tensor-group--199m-elements) + - [Block 14 Tensor Group : ~199M Elements](#block-14-tensor-group--199m-elements) + - [Block 15 Tensor Group : ~199M Elements](#block-15-tensor-group--199m-elements) + - [Block 16 Tensor Group : ~199M Elements](#block-16-tensor-group--199m-elements) + - [Block 17 Tensor Group : ~199M Elements](#block-17-tensor-group--199m-elements) + - [Block 18 Tensor Group : ~199M Elements](#block-18-tensor-group--199m-elements) + - [Block 19 Tensor Group : ~199M Elements](#block-19-tensor-group--199m-elements) + - [Block 20 Tensor Group : ~199M Elements](#block-20-tensor-group--199m-elements) + - [Block 21 Tensor Group : ~199M Elements](#block-21-tensor-group--199m-elements) + - [Block 22 Tensor Group : ~199M Elements](#block-22-tensor-group--199m-elements) + - [Block 23 Tensor Group : ~199M Elements](#block-23-tensor-group--199m-elements) + - [Block 24 Tensor Group : ~199M Elements](#block-24-tensor-group--199m-elements) + - [Block 25 Tensor Group : ~199M Elements](#block-25-tensor-group--199m-elements) + - [Block 26 Tensor Group : ~199M Elements](#block-26-tensor-group--199m-elements) + - [Block 27 Tensor Group : ~199M Elements](#block-27-tensor-group--199m-elements) + - [Block 28 Tensor Group : ~199M Elements](#block-28-tensor-group--199m-elements) + - [Block 29 Tensor Group : ~199M Elements](#block-29-tensor-group--199m-elements) + - [Block 30 Tensor Group : ~199M Elements](#block-30-tensor-group--199m-elements) + - [Block 31 Tensor Group : ~199M Elements](#block-31-tensor-group--199m-elements) + - [Block 32 Tensor Group : ~199M Elements](#block-32-tensor-group--199m-elements) + - [Block 33 Tensor Group : ~199M Elements](#block-33-tensor-group--199m-elements) + - [Block 34 Tensor Group : ~199M Elements](#block-34-tensor-group--199m-elements) + - [Block 35 Tensor Group : ~199M Elements](#block-35-tensor-group--199m-elements) + - [Block 36 Tensor Group : ~199M Elements](#block-36-tensor-group--199m-elements) + - [Block 37 Tensor Group : ~199M Elements](#block-37-tensor-group--199m-elements) + - [Block 38 Tensor Group : ~199M Elements](#block-38-tensor-group--199m-elements) + - [Block 39 Tensor Group : ~199M Elements](#block-39-tensor-group--199m-elements) + +### Tensor Data Offset + +This table contains the offset and data segment relative to start of file + +| T_ID | Tensor Layer Name | Data Offset (B) | Data Size (B) | +|-----:|:--------------------------|-----------------:|-----------------:| +| 0 | output.weight | 0x367400 | 0x10d80000 | +| 1 | output_norm.weight | 0x110e7400 | 0x4000 | +| 2 | token_embd.weight | 0x110eb400 | 0xdc80000 | +| 3 | blk.0.attn_k.weight | 0x1ed6b400 | 0x1b8000 | +| 4 | blk.0.attn_norm.weight | 0x1ef23400 | 0x4000 | +| 5 | blk.0.attn_output.weight | 0x1ef27400 | 0x900000 | +| 6 | blk.0.attn_q.weight | 0x1f827400 | 0x620000 | +| 7 | blk.0.attn_v.weight | 0x1fe47400 | 0x240000 | +| 8 | blk.0.ffn_down.weight | 0x20087400 | 0x1a90000 | +| 9 | blk.0.ffn_gate.weight | 0x21b17400 | 0x1324000 | +| 10 | blk.0.ffn_norm.weight | 0x22e3b400 | 0x4000 | +| 11 | blk.0.ffn_up.weight | 0x22e3f400 | 0x1c20000 | +| 12 | blk.1.attn_k.weight | 0x24a5f400 | 0x220000 | +| 13 | blk.1.attn_norm.weight | 0x24c7f400 | 0x4000 | +| 14 | blk.1.attn_output.weight | 0x24c83400 | 0xb00000 | +| 15 | blk.1.attn_q.weight | 0x25783400 | 0x900000 | +| 16 | blk.1.attn_v.weight | 0x26083400 | 0x2c0000 | +| 17 | blk.1.ffn_down.weight | 0x26343400 | 0x1a90000 | +| 18 | blk.1.ffn_gate.weight | 0x27dd3400 | 0x1a90000 | +| 19 | blk.1.ffn_norm.weight | 0x29863400 | 0x4000 | +| 20 | blk.1.ffn_up.weight | 0x29867400 | 0x2260000 | +| 21 | blk.2.attn_k.weight | 0x2bac7400 | 0x220000 | +| 22 | blk.2.attn_norm.weight | 0x2bce7400 | 0x4000 | +| 23 | blk.2.attn_output.weight | 0x2bceb400 | 0xb00000 | +| 24 | blk.2.attn_q.weight | 0x2c7eb400 | 0x900000 | +| 25 | blk.2.attn_v.weight | 0x2d0eb400 | 0x2c0000 | +| 26 | blk.2.ffn_down.weight | 0x2d3ab400 | 0x1a90000 | +| 27 | blk.2.ffn_gate.weight | 0x2ee3b400 | 0x1a90000 | +| 28 | blk.2.ffn_norm.weight | 0x308cb400 | 0x4000 | +| 29 | blk.2.ffn_up.weight | 0x308cf400 | 0x2260000 | +| 30 | blk.3.attn_k.weight | 0x32b2f400 | 0x220000 | +| 31 | blk.3.attn_norm.weight | 0x32d4f400 | 0x4000 | +| 32 | blk.3.attn_output.weight | 0x32d53400 | 0xb00000 | +| 33 | blk.3.attn_q.weight | 0x33853400 | 0x900000 | +| 34 | blk.3.attn_v.weight | 0x34153400 | 0x2c0000 | +| 35 | blk.3.ffn_down.weight | 0x34413400 | 0x1c20000 | +| 36 | blk.3.ffn_gate.weight | 0x36033400 | 0x1a90000 | +| 37 | blk.3.ffn_norm.weight | 0x37ac3400 | 0x4000 | +| 38 | blk.3.ffn_up.weight | 0x37ac7400 | 0x1c20000 | +| 39 | blk.4.attn_k.weight | 0x396e7400 | 0x240000 | +| 40 | blk.4.attn_norm.weight | 0x39927400 | 0x4000 | +| 41 | blk.4.attn_output.weight | 0x3992b400 | 0xb00000 | +| 42 | blk.4.attn_q.weight | 0x3a42b400 | 0x900000 | +| 43 | blk.4.attn_v.weight | 0x3ad2b400 | 0x2c0000 | +| 44 | blk.4.ffn_down.weight | 0x3afeb400 | 0x1a90000 | +| 45 | blk.4.ffn_gate.weight | 0x3ca7b400 | 0x1a90000 | +| 46 | blk.4.ffn_norm.weight | 0x3e50b400 | 0x4000 | +| 47 | blk.4.ffn_up.weight | 0x3e50f400 | 0x1a90000 | +| 48 | blk.5.attn_k.weight | 0x3ff9f400 | 0x240000 | +| 49 | blk.5.attn_norm.weight | 0x401df400 | 0x4000 | +| 50 | blk.5.attn_output.weight | 0x401e3400 | 0xb00000 | +| 51 | blk.5.attn_q.weight | 0x40ce3400 | 0x900000 | +| 52 | blk.5.attn_v.weight | 0x415e3400 | 0x2c0000 | +| 53 | blk.5.ffn_down.weight | 0x418a3400 | 0x1c20000 | +| 54 | blk.5.ffn_gate.weight | 0x434c3400 | 0x1a90000 | +| 55 | blk.5.ffn_norm.weight | 0x44f53400 | 0x4000 | +| 56 | blk.5.ffn_up.weight | 0x44f57400 | 0x1c20000 | +| 57 | blk.6.attn_k.weight | 0x46b77400 | 0x240000 | +| 58 | blk.6.attn_norm.weight | 0x46db7400 | 0x4000 | +| 59 | blk.6.attn_output.weight | 0x46dbb400 | 0xb00000 | +| 60 | blk.6.attn_q.weight | 0x478bb400 | 0x900000 | +| 61 | blk.6.attn_v.weight | 0x481bb400 | 0x2c0000 | +| 62 | blk.6.ffn_down.weight | 0x4847b400 | 0x1a90000 | +| 63 | blk.6.ffn_gate.weight | 0x49f0b400 | 0x1a90000 | +| 64 | blk.6.ffn_norm.weight | 0x4b99b400 | 0x4000 | +| 65 | blk.6.ffn_up.weight | 0x4b99f400 | 0x1a90000 | +| 66 | blk.7.attn_k.weight | 0x4d42f400 | 0x240000 | +| 67 | blk.7.attn_norm.weight | 0x4d66f400 | 0x4000 | +| 68 | blk.7.attn_output.weight | 0x4d673400 | 0xb00000 | +| 69 | blk.7.attn_q.weight | 0x4e173400 | 0x900000 | +| 70 | blk.7.attn_v.weight | 0x4ea73400 | 0x2c0000 | +| 71 | blk.7.ffn_down.weight | 0x4ed33400 | 0x1a90000 | +| 72 | blk.7.ffn_gate.weight | 0x507c3400 | 0x1a90000 | +| 73 | blk.7.ffn_norm.weight | 0x52253400 | 0x4000 | +| 74 | blk.7.ffn_up.weight | 0x52257400 | 0x1c20000 | +| 75 | blk.8.attn_k.weight | 0x53e77400 | 0x240000 | +| 76 | blk.8.attn_norm.weight | 0x540b7400 | 0x4000 | +| 77 | blk.8.attn_output.weight | 0x540bb400 | 0xb00000 | +| 78 | blk.8.attn_q.weight | 0x54bbb400 | 0x900000 | +| 79 | blk.8.attn_v.weight | 0x554bb400 | 0x2c0000 | +| 80 | blk.8.ffn_down.weight | 0x5577b400 | 0x1a90000 | +| 81 | blk.8.ffn_gate.weight | 0x5720b400 | 0x1a90000 | +| 82 | blk.8.ffn_norm.weight | 0x58c9b400 | 0x4000 | +| 83 | blk.8.ffn_up.weight | 0x58c9f400 | 0x1a90000 | +| 84 | blk.9.attn_k.weight | 0x5a72f400 | 0x240000 | +| 85 | blk.9.attn_norm.weight | 0x5a96f400 | 0x4000 | +| 86 | blk.9.attn_output.weight | 0x5a973400 | 0xb00000 | +| 87 | blk.9.attn_q.weight | 0x5b473400 | 0x900000 | +| 88 | blk.9.attn_v.weight | 0x5bd73400 | 0x2c0000 | +| 89 | blk.9.ffn_down.weight | 0x5c033400 | 0x1a90000 | +| 90 | blk.9.ffn_gate.weight | 0x5dac3400 | 0x1a90000 | +| 91 | blk.9.ffn_norm.weight | 0x5f553400 | 0x4000 | +| 92 | blk.9.ffn_up.weight | 0x5f557400 | 0x1a90000 | +| 93 | blk.10.attn_k.weight | 0x60fe7400 | 0x240000 | +| 94 | blk.10.attn_norm.weight | 0x61227400 | 0x4000 | +| 95 | blk.10.attn_output.weight | 0x6122b400 | 0xb00000 | +| 96 | blk.10.attn_q.weight | 0x61d2b400 | 0x900000 | +| 97 | blk.10.attn_v.weight | 0x6262b400 | 0x2c0000 | +| 98 | blk.10.ffn_down.weight | 0x628eb400 | 0x1a90000 | +| 99 | blk.10.ffn_gate.weight | 0x6437b400 | 0x1a90000 | +| 100 | blk.10.ffn_norm.weight | 0x65e0b400 | 0x4000 | +| 101 | blk.10.ffn_up.weight | 0x65e0f400 | 0x2260000 | +| 102 | blk.11.attn_k.weight | 0x6806f400 | 0x220000 | +| 103 | blk.11.attn_norm.weight | 0x6828f400 | 0x4000 | +| 104 | blk.11.attn_output.weight | 0x68293400 | 0xb00000 | +| 105 | blk.11.attn_q.weight | 0x68d93400 | 0x900000 | +| 106 | blk.11.attn_v.weight | 0x69693400 | 0x2c0000 | +| 107 | blk.11.ffn_down.weight | 0x69953400 | 0x1a90000 | +| 108 | blk.11.ffn_gate.weight | 0x6b3e3400 | 0x1a90000 | +| 109 | blk.11.ffn_norm.weight | 0x6ce73400 | 0x4000 | +| 110 | blk.11.ffn_up.weight | 0x6ce77400 | 0x1a90000 | +| 111 | blk.12.attn_k.weight | 0x6e907400 | 0x240000 | +| 112 | blk.12.attn_norm.weight | 0x6eb47400 | 0x4000 | +| 113 | blk.12.attn_output.weight | 0x6eb4b400 | 0xb00000 | +| 114 | blk.12.attn_q.weight | 0x6f64b400 | 0x900000 | +| 115 | blk.12.attn_v.weight | 0x6ff4b400 | 0x2c0000 | +| 116 | blk.12.ffn_down.weight | 0x7020b400 | 0x1a90000 | +| 117 | blk.12.ffn_gate.weight | 0x71c9b400 | 0x1a90000 | +| 118 | blk.12.ffn_norm.weight | 0x7372b400 | 0x4000 | +| 119 | blk.12.ffn_up.weight | 0x7372f400 | 0x1a90000 | +| 120 | blk.13.attn_k.weight | 0x751bf400 | 0x240000 | +| 121 | blk.13.attn_norm.weight | 0x753ff400 | 0x4000 | +| 122 | blk.13.attn_output.weight | 0x75403400 | 0xb00000 | +| 123 | blk.13.attn_q.weight | 0x75f03400 | 0x900000 | +| 124 | blk.13.attn_v.weight | 0x76803400 | 0x2c0000 | +| 125 | blk.13.ffn_down.weight | 0x76ac3400 | 0x1a90000 | +| 126 | blk.13.ffn_gate.weight | 0x78553400 | 0x1a90000 | +| 127 | blk.13.ffn_norm.weight | 0x79fe3400 | 0x4000 | +| 128 | blk.13.ffn_up.weight | 0x79fe7400 | 0x1a90000 | +| 129 | blk.14.attn_k.weight | 0x7ba77400 | 0x240000 | +| 130 | blk.14.attn_norm.weight | 0x7bcb7400 | 0x4000 | +| 131 | blk.14.attn_output.weight | 0x7bcbb400 | 0xb00000 | +| 132 | blk.14.attn_q.weight | 0x7c7bb400 | 0x900000 | +| 133 | blk.14.attn_v.weight | 0x7d0bb400 | 0x2c0000 | +| 134 | blk.14.ffn_down.weight | 0x7d37b400 | 0x1a90000 | +| 135 | blk.14.ffn_gate.weight | 0x7ee0b400 | 0x1a90000 | +| 136 | blk.14.ffn_norm.weight | 0x8089b400 | 0x4000 | +| 137 | blk.14.ffn_up.weight | 0x8089f400 | 0x1a90000 | +| 138 | blk.15.attn_k.weight | 0x8232f400 | 0x240000 | +| 139 | blk.15.attn_norm.weight | 0x8256f400 | 0x4000 | +| 140 | blk.15.attn_output.weight | 0x82573400 | 0xb00000 | +| 141 | blk.15.attn_q.weight | 0x83073400 | 0x900000 | +| 142 | blk.15.attn_v.weight | 0x83973400 | 0x2c0000 | +| 143 | blk.15.ffn_down.weight | 0x83c33400 | 0x1a90000 | +| 144 | blk.15.ffn_gate.weight | 0x856c3400 | 0x1a90000 | +| 145 | blk.15.ffn_norm.weight | 0x87153400 | 0x4000 | +| 146 | blk.15.ffn_up.weight | 0x87157400 | 0x1a90000 | +| 147 | blk.16.attn_k.weight | 0x88be7400 | 0x240000 | +| 148 | blk.16.attn_norm.weight | 0x88e27400 | 0x4000 | +| 149 | blk.16.attn_output.weight | 0x88e2b400 | 0xb00000 | +| 150 | blk.16.attn_q.weight | 0x8992b400 | 0x900000 | +| 151 | blk.16.attn_v.weight | 0x8a22b400 | 0x2c0000 | +| 152 | blk.16.ffn_down.weight | 0x8a4eb400 | 0x1a90000 | +| 153 | blk.16.ffn_gate.weight | 0x8bf7b400 | 0x1a90000 | +| 154 | blk.16.ffn_norm.weight | 0x8da0b400 | 0x4000 | +| 155 | blk.16.ffn_up.weight | 0x8da0f400 | 0x1a90000 | +| 156 | blk.17.attn_k.weight | 0x8f49f400 | 0x240000 | +| 157 | blk.17.attn_norm.weight | 0x8f6df400 | 0x4000 | +| 158 | blk.17.attn_output.weight | 0x8f6e3400 | 0xb00000 | +| 159 | blk.17.attn_q.weight | 0x901e3400 | 0x900000 | +| 160 | blk.17.attn_v.weight | 0x90ae3400 | 0x2c0000 | +| 161 | blk.17.ffn_down.weight | 0x90da3400 | 0x1a90000 | +| 162 | blk.17.ffn_gate.weight | 0x92833400 | 0x1a90000 | +| 163 | blk.17.ffn_norm.weight | 0x942c3400 | 0x4000 | +| 164 | blk.17.ffn_up.weight | 0x942c7400 | 0x1a90000 | +| 165 | blk.18.attn_k.weight | 0x95d57400 | 0x220000 | +| 166 | blk.18.attn_norm.weight | 0x95f77400 | 0x4000 | +| 167 | blk.18.attn_output.weight | 0x95f7b400 | 0xb00000 | +| 168 | blk.18.attn_q.weight | 0x96a7b400 | 0x900000 | +| 169 | blk.18.attn_v.weight | 0x9737b400 | 0x2c0000 | +| 170 | blk.18.ffn_down.weight | 0x9763b400 | 0x1a90000 | +| 171 | blk.18.ffn_gate.weight | 0x990cb400 | 0x1a90000 | +| 172 | blk.18.ffn_norm.weight | 0x9ab5b400 | 0x4000 | +| 173 | blk.18.ffn_up.weight | 0x9ab5f400 | 0x1a90000 | +| 174 | blk.19.attn_k.weight | 0x9c5ef400 | 0x240000 | +| 175 | blk.19.attn_norm.weight | 0x9c82f400 | 0x4000 | +| 176 | blk.19.attn_output.weight | 0x9c833400 | 0xb00000 | +| 177 | blk.19.attn_q.weight | 0x9d333400 | 0x900000 | +| 178 | blk.19.attn_v.weight | 0x9dc33400 | 0x2c0000 | +| 179 | blk.19.ffn_down.weight | 0x9def3400 | 0x1a90000 | +| 180 | blk.19.ffn_gate.weight | 0x9f983400 | 0x1a90000 | +| 181 | blk.19.ffn_norm.weight | 0xa1413400 | 0x4000 | +| 182 | blk.19.ffn_up.weight | 0xa1417400 | 0x1c20000 | +| 183 | blk.20.attn_k.weight | 0xa3037400 | 0x240000 | +| 184 | blk.20.attn_norm.weight | 0xa3277400 | 0x4000 | +| 185 | blk.20.attn_output.weight | 0xa327b400 | 0xb00000 | +| 186 | blk.20.attn_q.weight | 0xa3d7b400 | 0x900000 | +| 187 | blk.20.attn_v.weight | 0xa467b400 | 0x2c0000 | +| 188 | blk.20.ffn_down.weight | 0xa493b400 | 0x1a90000 | +| 189 | blk.20.ffn_gate.weight | 0xa63cb400 | 0x1a90000 | +| 190 | blk.20.ffn_norm.weight | 0xa7e5b400 | 0x4000 | +| 191 | blk.20.ffn_up.weight | 0xa7e5f400 | 0x1a90000 | +| 192 | blk.21.attn_k.weight | 0xa98ef400 | 0x220000 | +| 193 | blk.21.attn_norm.weight | 0xa9b0f400 | 0x4000 | +| 194 | blk.21.attn_output.weight | 0xa9b13400 | 0xb00000 | +| 195 | blk.21.attn_q.weight | 0xaa613400 | 0x900000 | +| 196 | blk.21.attn_v.weight | 0xaaf13400 | 0x2c0000 | +| 197 | blk.21.ffn_down.weight | 0xab1d3400 | 0x1a90000 | +| 198 | blk.21.ffn_gate.weight | 0xacc63400 | 0x1a90000 | +| 199 | blk.21.ffn_norm.weight | 0xae6f3400 | 0x4000 | +| 200 | blk.21.ffn_up.weight | 0xae6f7400 | 0x1a90000 | +| 201 | blk.22.attn_k.weight | 0xb0187400 | 0x240000 | +| 202 | blk.22.attn_norm.weight | 0xb03c7400 | 0x4000 | +| 203 | blk.22.attn_output.weight | 0xb03cb400 | 0xb00000 | +| 204 | blk.22.attn_q.weight | 0xb0ecb400 | 0x900000 | +| 205 | blk.22.attn_v.weight | 0xb17cb400 | 0x2c0000 | +| 206 | blk.22.ffn_down.weight | 0xb1a8b400 | 0x1a90000 | +| 207 | blk.22.ffn_gate.weight | 0xb351b400 | 0x1a90000 | +| 208 | blk.22.ffn_norm.weight | 0xb4fab400 | 0x4000 | +| 209 | blk.22.ffn_up.weight | 0xb4faf400 | 0x1a90000 | +| 210 | blk.23.attn_k.weight | 0xb6a3f400 | 0x240000 | +| 211 | blk.23.attn_norm.weight | 0xb6c7f400 | 0x4000 | +| 212 | blk.23.attn_output.weight | 0xb6c83400 | 0xb00000 | +| 213 | blk.23.attn_q.weight | 0xb7783400 | 0x900000 | +| 214 | blk.23.attn_v.weight | 0xb8083400 | 0x2c0000 | +| 215 | blk.23.ffn_down.weight | 0xb8343400 | 0x1a90000 | +| 216 | blk.23.ffn_gate.weight | 0xb9dd3400 | 0x1a90000 | +| 217 | blk.23.ffn_norm.weight | 0xbb863400 | 0x4000 | +| 218 | blk.23.ffn_up.weight | 0xbb867400 | 0x1c20000 | +| 219 | blk.24.attn_k.weight | 0xbd487400 | 0x220000 | +| 220 | blk.24.attn_norm.weight | 0xbd6a7400 | 0x4000 | +| 221 | blk.24.attn_output.weight | 0xbd6ab400 | 0xb00000 | +| 222 | blk.24.attn_q.weight | 0xbe1ab400 | 0x900000 | +| 223 | blk.24.attn_v.weight | 0xbeaab400 | 0x2c0000 | +| 224 | blk.24.ffn_down.weight | 0xbed6b400 | 0x1a90000 | +| 225 | blk.24.ffn_gate.weight | 0xc07fb400 | 0x1a90000 | +| 226 | blk.24.ffn_norm.weight | 0xc228b400 | 0x4000 | +| 227 | blk.24.ffn_up.weight | 0xc228f400 | 0x1c20000 | +| 228 | blk.25.attn_k.weight | 0xc3eaf400 | 0x240000 | +| 229 | blk.25.attn_norm.weight | 0xc40ef400 | 0x4000 | +| 230 | blk.25.attn_output.weight | 0xc40f3400 | 0xb00000 | +| 231 | blk.25.attn_q.weight | 0xc4bf3400 | 0x900000 | +| 232 | blk.25.attn_v.weight | 0xc54f3400 | 0x2c0000 | +| 233 | blk.25.ffn_down.weight | 0xc57b3400 | 0x1a90000 | +| 234 | blk.25.ffn_gate.weight | 0xc7243400 | 0x1a90000 | +| 235 | blk.25.ffn_norm.weight | 0xc8cd3400 | 0x4000 | +| 236 | blk.25.ffn_up.weight | 0xc8cd7400 | 0x1c20000 | +| 237 | blk.26.attn_k.weight | 0xca8f7400 | 0x240000 | +| 238 | blk.26.attn_norm.weight | 0xcab37400 | 0x4000 | +| 239 | blk.26.attn_output.weight | 0xcab3b400 | 0xb00000 | +| 240 | blk.26.attn_q.weight | 0xcb63b400 | 0x900000 | +| 241 | blk.26.attn_v.weight | 0xcbf3b400 | 0x2c0000 | +| 242 | blk.26.ffn_down.weight | 0xcc1fb400 | 0x1a90000 | +| 243 | blk.26.ffn_gate.weight | 0xcdc8b400 | 0x1a90000 | +| 244 | blk.26.ffn_norm.weight | 0xcf71b400 | 0x4000 | +| 245 | blk.26.ffn_up.weight | 0xcf71f400 | 0x1a90000 | +| 246 | blk.27.attn_k.weight | 0xd11af400 | 0x240000 | +| 247 | blk.27.attn_norm.weight | 0xd13ef400 | 0x4000 | +| 248 | blk.27.attn_output.weight | 0xd13f3400 | 0xb00000 | +| 249 | blk.27.attn_q.weight | 0xd1ef3400 | 0x900000 | +| 250 | blk.27.attn_v.weight | 0xd27f3400 | 0x2c0000 | +| 251 | blk.27.ffn_down.weight | 0xd2ab3400 | 0x1a90000 | +| 252 | blk.27.ffn_gate.weight | 0xd4543400 | 0x1a90000 | +| 253 | blk.27.ffn_norm.weight | 0xd5fd3400 | 0x4000 | +| 254 | blk.27.ffn_up.weight | 0xd5fd7400 | 0x1a90000 | +| 255 | blk.28.attn_k.weight | 0xd7a67400 | 0x240000 | +| 256 | blk.28.attn_norm.weight | 0xd7ca7400 | 0x4000 | +| 257 | blk.28.attn_output.weight | 0xd7cab400 | 0xb00000 | +| 258 | blk.28.attn_q.weight | 0xd87ab400 | 0x900000 | +| 259 | blk.28.attn_v.weight | 0xd90ab400 | 0x2c0000 | +| 260 | blk.28.ffn_down.weight | 0xd936b400 | 0x1a90000 | +| 261 | blk.28.ffn_gate.weight | 0xdadfb400 | 0x1a90000 | +| 262 | blk.28.ffn_norm.weight | 0xdc88b400 | 0x4000 | +| 263 | blk.28.ffn_up.weight | 0xdc88f400 | 0x1c20000 | +| 264 | blk.29.attn_k.weight | 0xde4af400 | 0x240000 | +| 265 | blk.29.attn_norm.weight | 0xde6ef400 | 0x4000 | +| 266 | blk.29.attn_output.weight | 0xde6f3400 | 0xb00000 | +| 267 | blk.29.attn_q.weight | 0xdf1f3400 | 0x900000 | +| 268 | blk.29.attn_v.weight | 0xdfaf3400 | 0x2c0000 | +| 269 | blk.29.ffn_down.weight | 0xdfdb3400 | 0x1a90000 | +| 270 | blk.29.ffn_gate.weight | 0xe1843400 | 0x1a90000 | +| 271 | blk.29.ffn_norm.weight | 0xe32d3400 | 0x4000 | +| 272 | blk.29.ffn_up.weight | 0xe32d7400 | 0x1c20000 | +| 273 | blk.30.attn_k.weight | 0xe4ef7400 | 0x240000 | +| 274 | blk.30.attn_norm.weight | 0xe5137400 | 0x4000 | +| 275 | blk.30.attn_output.weight | 0xe513b400 | 0xb00000 | +| 276 | blk.30.attn_q.weight | 0xe5c3b400 | 0x900000 | +| 277 | blk.30.attn_v.weight | 0xe653b400 | 0x2c0000 | +| 278 | blk.30.ffn_down.weight | 0xe67fb400 | 0x1a90000 | +| 279 | blk.30.ffn_gate.weight | 0xe828b400 | 0x1a90000 | +| 280 | blk.30.ffn_norm.weight | 0xe9d1b400 | 0x4000 | +| 281 | blk.30.ffn_up.weight | 0xe9d1f400 | 0x1a90000 | +| 282 | blk.31.attn_k.weight | 0xeb7af400 | 0x240000 | +| 283 | blk.31.attn_norm.weight | 0xeb9ef400 | 0x4000 | +| 284 | blk.31.attn_output.weight | 0xeb9f3400 | 0xb00000 | +| 285 | blk.31.attn_q.weight | 0xec4f3400 | 0x900000 | +| 286 | blk.31.attn_v.weight | 0xecdf3400 | 0x2c0000 | +| 287 | blk.31.ffn_down.weight | 0xed0b3400 | 0x1a90000 | +| 288 | blk.31.ffn_gate.weight | 0xeeb43400 | 0x1a90000 | +| 289 | blk.31.ffn_norm.weight | 0xf05d3400 | 0x4000 | +| 290 | blk.31.ffn_up.weight | 0xf05d7400 | 0x1c20000 | +| 291 | blk.32.attn_k.weight | 0xf21f7400 | 0x240000 | +| 292 | blk.32.attn_norm.weight | 0xf2437400 | 0x4000 | +| 293 | blk.32.attn_output.weight | 0xf243b400 | 0xb00000 | +| 294 | blk.32.attn_q.weight | 0xf2f3b400 | 0x900000 | +| 295 | blk.32.attn_v.weight | 0xf383b400 | 0x2c0000 | +| 296 | blk.32.ffn_down.weight | 0xf3afb400 | 0x1a90000 | +| 297 | blk.32.ffn_gate.weight | 0xf558b400 | 0x1a90000 | +| 298 | blk.32.ffn_norm.weight | 0xf701b400 | 0x4000 | +| 299 | blk.32.ffn_up.weight | 0xf701f400 | 0x1c20000 | +| 300 | blk.33.attn_k.weight | 0xf8c3f400 | 0x240000 | +| 301 | blk.33.attn_norm.weight | 0xf8e7f400 | 0x4000 | +| 302 | blk.33.attn_output.weight | 0xf8e83400 | 0xb00000 | +| 303 | blk.33.attn_q.weight | 0xf9983400 | 0x900000 | +| 304 | blk.33.attn_v.weight | 0xfa283400 | 0x2c0000 | +| 305 | blk.33.ffn_down.weight | 0xfa543400 | 0x1a90000 | +| 306 | blk.33.ffn_gate.weight | 0xfbfd3400 | 0x1a90000 | +| 307 | blk.33.ffn_norm.weight | 0xfda63400 | 0x4000 | +| 308 | blk.33.ffn_up.weight | 0xfda67400 | 0x1c20000 | +| 309 | blk.34.attn_k.weight | 0xff687400 | 0x240000 | +| 310 | blk.34.attn_norm.weight | 0xff8c7400 | 0x4000 | +| 311 | blk.34.attn_output.weight | 0xff8cb400 | 0xb00000 | +| 312 | blk.34.attn_q.weight | 0x1003cb400 | 0x900000 | +| 313 | blk.34.attn_v.weight | 0x100ccb400 | 0x2c0000 | +| 314 | blk.34.ffn_down.weight | 0x100f8b400 | 0x1a90000 | +| 315 | blk.34.ffn_gate.weight | 0x102a1b400 | 0x1a90000 | +| 316 | blk.34.ffn_norm.weight | 0x1044ab400 | 0x4000 | +| 317 | blk.34.ffn_up.weight | 0x1044af400 | 0x1c20000 | +| 318 | blk.35.attn_k.weight | 0x1060cf400 | 0x220000 | +| 319 | blk.35.attn_norm.weight | 0x1062ef400 | 0x4000 | +| 320 | blk.35.attn_output.weight | 0x1062f3400 | 0xb00000 | +| 321 | blk.35.attn_q.weight | 0x106df3400 | 0x900000 | +| 322 | blk.35.attn_v.weight | 0x1076f3400 | 0x240000 | +| 323 | blk.35.ffn_down.weight | 0x107933400 | 0x1a90000 | +| 324 | blk.35.ffn_gate.weight | 0x1093c3400 | 0x1a90000 | +| 325 | blk.35.ffn_norm.weight | 0x10ae53400 | 0x4000 | +| 326 | blk.35.ffn_up.weight | 0x10ae57400 | 0x1a90000 | +| 327 | blk.36.attn_k.weight | 0x10c8e7400 | 0x220000 | +| 328 | blk.36.attn_norm.weight | 0x10cb07400 | 0x4000 | +| 329 | blk.36.attn_output.weight | 0x10cb0b400 | 0xb00000 | +| 330 | blk.36.attn_q.weight | 0x10d60b400 | 0x900000 | +| 331 | blk.36.attn_v.weight | 0x10df0b400 | 0x240000 | +| 332 | blk.36.ffn_down.weight | 0x10e14b400 | 0x1a90000 | +| 333 | blk.36.ffn_gate.weight | 0x10fbdb400 | 0x1a90000 | +| 334 | blk.36.ffn_norm.weight | 0x11166b400 | 0x4000 | +| 335 | blk.36.ffn_up.weight | 0x11166f400 | 0x1a90000 | +| 336 | blk.37.attn_k.weight | 0x1130ff400 | 0x220000 | +| 337 | blk.37.attn_norm.weight | 0x11331f400 | 0x4000 | +| 338 | blk.37.attn_output.weight | 0x113323400 | 0xb00000 | +| 339 | blk.37.attn_q.weight | 0x113e23400 | 0x900000 | +| 340 | blk.37.attn_v.weight | 0x114723400 | 0x2c0000 | +| 341 | blk.37.ffn_down.weight | 0x1149e3400 | 0x1a90000 | +| 342 | blk.37.ffn_gate.weight | 0x116473400 | 0x1a90000 | +| 343 | blk.37.ffn_norm.weight | 0x117f03400 | 0x4000 | +| 344 | blk.37.ffn_up.weight | 0x117f07400 | 0x1a90000 | +| 345 | blk.38.attn_k.weight | 0x119997400 | 0x220000 | +| 346 | blk.38.attn_norm.weight | 0x119bb7400 | 0x4000 | +| 347 | blk.38.attn_output.weight | 0x119bbb400 | 0xb00000 | +| 348 | blk.38.attn_q.weight | 0x11a6bb400 | 0x880000 | +| 349 | blk.38.attn_v.weight | 0x11af3b400 | 0x240000 | +| 350 | blk.38.ffn_down.weight | 0x11b17b400 | 0x1a90000 | +| 351 | blk.38.ffn_gate.weight | 0x11cc0b400 | 0x1a90000 | +| 352 | blk.38.ffn_norm.weight | 0x11e69b400 | 0x4000 | +| 353 | blk.38.ffn_up.weight | 0x11e69f400 | 0x1c20000 | +| 354 | blk.39.attn_k.weight | 0x1202bf400 | 0x220000 | +| 355 | blk.39.attn_norm.weight | 0x1204df400 | 0x4000 | +| 356 | blk.39.attn_output.weight | 0x1204e3400 | 0xb00000 | +| 357 | blk.39.attn_q.weight | 0x120fe3400 | 0x880000 | +| 358 | blk.39.attn_v.weight | 0x121863400 | 0x2c0000 | +| 359 | blk.39.ffn_down.weight | 0x121b23400 | 0x1c20000 | +| 360 | blk.39.ffn_gate.weight | 0x123743400 | 0x1c20000 | +| 361 | blk.39.ffn_norm.weight | 0x125363400 | 0x4000 | +| 362 | blk.39.ffn_up.weight | 0x125367400 | 0x1c20000 | + +### Base Tensor Group : ~822M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------|:---------------------------------|:------------------|:----------------------|:-----|--------:| +| 0 | output.weight | Output (W) | (~411M) 411041792 | 4096 x 100352 x 1 x 1 | Q5_K | 5.5000 | +| 1 | output_norm.weight | Output Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 2 | token_embd.weight | Token Embedding (W) | (~411M) 411041792 | 4096 x 100352 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in base: (~822M) 822087680 +- Percentage of total elements: 9.35% +- Bits per Weight (BPW) for base: 5.0001 bits + + +### Block 0 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:--------|--------:| +| 3 | blk.0.attn_k.weight | Block 0 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ3_S | 3.4375 | +| 4 | blk.0.attn_norm.weight | Block 0 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 5 | blk.0.attn_output.weight | Block 0 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 6 | blk.0.attn_q.weight | Block 0 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ3_XXS | 3.0625 | +| 7 | blk.0.attn_v.weight | Block 0 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 8 | blk.0.ffn_down.weight | Block 0 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 9 | blk.0.ffn_gate.weight | Block 0 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ3_XXS | 3.0625 | +| 10 | blk.0.ffn_norm.weight | Block 0 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 11 | blk.0.ffn_up.weight | Block 0 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in blk.0: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.0: 3.9137 bits + + +### Block 1 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 12 | blk.1.attn_k.weight | Block 1 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 13 | blk.1.attn_norm.weight | Block 1 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 14 | blk.1.attn_output.weight | Block 1 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 15 | blk.1.attn_q.weight | Block 1 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 16 | blk.1.attn_v.weight | Block 1 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 17 | blk.1.ffn_down.weight | Block 1 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 18 | blk.1.ffn_gate.weight | Block 1 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 19 | blk.1.ffn_norm.weight | Block 1 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 20 | blk.1.ffn_up.weight | Block 1 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.1: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.1: 4.7327 bits + + +### Block 2 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 21 | blk.2.attn_k.weight | Block 2 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 22 | blk.2.attn_norm.weight | Block 2 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 23 | blk.2.attn_output.weight | Block 2 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 24 | blk.2.attn_q.weight | Block 2 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 25 | blk.2.attn_v.weight | Block 2 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 26 | blk.2.ffn_down.weight | Block 2 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 27 | blk.2.ffn_gate.weight | Block 2 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 28 | blk.2.ffn_norm.weight | Block 2 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 29 | blk.2.ffn_up.weight | Block 2 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.2: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.2: 4.7327 bits + + +### Block 3 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 30 | blk.3.attn_k.weight | Block 3 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 31 | blk.3.attn_norm.weight | Block 3 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 32 | blk.3.attn_output.weight | Block 3 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 33 | blk.3.attn_q.weight | Block 3 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 34 | blk.3.attn_v.weight | Block 3 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 35 | blk.3.ffn_down.weight | Block 3 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 36 | blk.3.ffn_gate.weight | Block 3 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 37 | blk.3.ffn_norm.weight | Block 3 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 38 | blk.3.ffn_up.weight | Block 3 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in blk.3: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.3: 4.5353 bits + + +### Block 4 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 39 | blk.4.attn_k.weight | Block 4 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 40 | blk.4.attn_norm.weight | Block 4 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 41 | blk.4.attn_output.weight | Block 4 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 42 | blk.4.attn_q.weight | Block 4 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 43 | blk.4.attn_v.weight | Block 4 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 44 | blk.4.ffn_down.weight | Block 4 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 45 | blk.4.ffn_gate.weight | Block 4 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 46 | blk.4.ffn_norm.weight | Block 4 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 47 | blk.4.ffn_up.weight | Block 4 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.4: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.4: 4.4090 bits + + +### Block 5 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 48 | blk.5.attn_k.weight | Block 5 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 49 | blk.5.attn_norm.weight | Block 5 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 50 | blk.5.attn_output.weight | Block 5 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 51 | blk.5.attn_q.weight | Block 5 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 52 | blk.5.attn_v.weight | Block 5 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 53 | blk.5.ffn_down.weight | Block 5 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 54 | blk.5.ffn_gate.weight | Block 5 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 55 | blk.5.ffn_norm.weight | Block 5 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 56 | blk.5.ffn_up.weight | Block 5 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in blk.5: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.5: 4.5406 bits + + +### Block 6 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 57 | blk.6.attn_k.weight | Block 6 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 58 | blk.6.attn_norm.weight | Block 6 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 59 | blk.6.attn_output.weight | Block 6 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 60 | blk.6.attn_q.weight | Block 6 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 61 | blk.6.attn_v.weight | Block 6 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 62 | blk.6.ffn_down.weight | Block 6 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 63 | blk.6.ffn_gate.weight | Block 6 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 64 | blk.6.ffn_norm.weight | Block 6 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 65 | blk.6.ffn_up.weight | Block 6 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.6: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.6: 4.4090 bits + + +### Block 7 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 66 | blk.7.attn_k.weight | Block 7 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 67 | blk.7.attn_norm.weight | Block 7 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 68 | blk.7.attn_output.weight | Block 7 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 69 | blk.7.attn_q.weight | Block 7 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 70 | blk.7.attn_v.weight | Block 7 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 71 | blk.7.ffn_down.weight | Block 7 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 72 | blk.7.ffn_gate.weight | Block 7 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 73 | blk.7.ffn_norm.weight | Block 7 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 74 | blk.7.ffn_up.weight | Block 7 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in blk.7: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.7: 4.4748 bits + + +### Block 8 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 75 | blk.8.attn_k.weight | Block 8 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 76 | blk.8.attn_norm.weight | Block 8 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 77 | blk.8.attn_output.weight | Block 8 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 78 | blk.8.attn_q.weight | Block 8 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 79 | blk.8.attn_v.weight | Block 8 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 80 | blk.8.ffn_down.weight | Block 8 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 81 | blk.8.ffn_gate.weight | Block 8 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 82 | blk.8.ffn_norm.weight | Block 8 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 83 | blk.8.ffn_up.weight | Block 8 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.8: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.8: 4.4090 bits + + +### Block 9 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 84 | blk.9.attn_k.weight | Block 9 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 85 | blk.9.attn_norm.weight | Block 9 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 86 | blk.9.attn_output.weight | Block 9 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 87 | blk.9.attn_q.weight | Block 9 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 88 | blk.9.attn_v.weight | Block 9 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 89 | blk.9.ffn_down.weight | Block 9 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 90 | blk.9.ffn_gate.weight | Block 9 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 91 | blk.9.ffn_norm.weight | Block 9 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 92 | blk.9.ffn_up.weight | Block 9 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.9: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.9: 4.4090 bits + + +### Block 10 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 93 | blk.10.attn_k.weight | Block 10 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 94 | blk.10.attn_norm.weight | Block 10 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 95 | blk.10.attn_output.weight | Block 10 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 96 | blk.10.attn_q.weight | Block 10 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 97 | blk.10.attn_v.weight | Block 10 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 98 | blk.10.ffn_down.weight | Block 10 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 99 | blk.10.ffn_gate.weight | Block 10 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 100 | blk.10.ffn_norm.weight | Block 10 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 101 | blk.10.ffn_up.weight | Block 10 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.10: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.10: 4.7380 bits + + +### Block 11 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 102 | blk.11.attn_k.weight | Block 11 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 103 | blk.11.attn_norm.weight | Block 11 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 104 | blk.11.attn_output.weight | Block 11 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 105 | blk.11.attn_q.weight | Block 11 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 106 | blk.11.attn_v.weight | Block 11 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 107 | blk.11.ffn_down.weight | Block 11 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 108 | blk.11.ffn_gate.weight | Block 11 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 109 | blk.11.ffn_norm.weight | Block 11 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 110 | blk.11.ffn_up.weight | Block 11 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.11: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.11: 4.4038 bits + + +### Block 12 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 111 | blk.12.attn_k.weight | Block 12 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 112 | blk.12.attn_norm.weight | Block 12 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 113 | blk.12.attn_output.weight | Block 12 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 114 | blk.12.attn_q.weight | Block 12 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 115 | blk.12.attn_v.weight | Block 12 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 116 | blk.12.ffn_down.weight | Block 12 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 117 | blk.12.ffn_gate.weight | Block 12 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 118 | blk.12.ffn_norm.weight | Block 12 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 119 | blk.12.ffn_up.weight | Block 12 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.12: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.12: 4.4090 bits + + +### Block 13 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 120 | blk.13.attn_k.weight | Block 13 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 121 | blk.13.attn_norm.weight | Block 13 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 122 | blk.13.attn_output.weight | Block 13 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 123 | blk.13.attn_q.weight | Block 13 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 124 | blk.13.attn_v.weight | Block 13 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 125 | blk.13.ffn_down.weight | Block 13 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 126 | blk.13.ffn_gate.weight | Block 13 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 127 | blk.13.ffn_norm.weight | Block 13 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 128 | blk.13.ffn_up.weight | Block 13 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.13: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.13: 4.4090 bits + + +### Block 14 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 129 | blk.14.attn_k.weight | Block 14 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 130 | blk.14.attn_norm.weight | Block 14 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 131 | blk.14.attn_output.weight | Block 14 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 132 | blk.14.attn_q.weight | Block 14 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 133 | blk.14.attn_v.weight | Block 14 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 134 | blk.14.ffn_down.weight | Block 14 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 135 | blk.14.ffn_gate.weight | Block 14 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 136 | blk.14.ffn_norm.weight | Block 14 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 137 | blk.14.ffn_up.weight | Block 14 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.14: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.14: 4.4090 bits + + +### Block 15 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 138 | blk.15.attn_k.weight | Block 15 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 139 | blk.15.attn_norm.weight | Block 15 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 140 | blk.15.attn_output.weight | Block 15 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 141 | blk.15.attn_q.weight | Block 15 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 142 | blk.15.attn_v.weight | Block 15 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 143 | blk.15.ffn_down.weight | Block 15 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 144 | blk.15.ffn_gate.weight | Block 15 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 145 | blk.15.ffn_norm.weight | Block 15 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 146 | blk.15.ffn_up.weight | Block 15 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.15: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.15: 4.4090 bits + + +### Block 16 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 147 | blk.16.attn_k.weight | Block 16 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 148 | blk.16.attn_norm.weight | Block 16 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 149 | blk.16.attn_output.weight | Block 16 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 150 | blk.16.attn_q.weight | Block 16 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 151 | blk.16.attn_v.weight | Block 16 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 152 | blk.16.ffn_down.weight | Block 16 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 153 | blk.16.ffn_gate.weight | Block 16 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 154 | blk.16.ffn_norm.weight | Block 16 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 155 | blk.16.ffn_up.weight | Block 16 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.16: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.16: 4.4090 bits + + +### Block 17 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 156 | blk.17.attn_k.weight | Block 17 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 157 | blk.17.attn_norm.weight | Block 17 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 158 | blk.17.attn_output.weight | Block 17 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 159 | blk.17.attn_q.weight | Block 17 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 160 | blk.17.attn_v.weight | Block 17 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 161 | blk.17.ffn_down.weight | Block 17 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 162 | blk.17.ffn_gate.weight | Block 17 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 163 | blk.17.ffn_norm.weight | Block 17 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 164 | blk.17.ffn_up.weight | Block 17 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.17: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.17: 4.4090 bits + + +### Block 18 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 165 | blk.18.attn_k.weight | Block 18 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 166 | blk.18.attn_norm.weight | Block 18 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 167 | blk.18.attn_output.weight | Block 18 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 168 | blk.18.attn_q.weight | Block 18 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 169 | blk.18.attn_v.weight | Block 18 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 170 | blk.18.ffn_down.weight | Block 18 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 171 | blk.18.ffn_gate.weight | Block 18 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 172 | blk.18.ffn_norm.weight | Block 18 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 173 | blk.18.ffn_up.weight | Block 18 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.18: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.18: 4.4038 bits + + +### Block 19 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 174 | blk.19.attn_k.weight | Block 19 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 175 | blk.19.attn_norm.weight | Block 19 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 176 | blk.19.attn_output.weight | Block 19 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 177 | blk.19.attn_q.weight | Block 19 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 178 | blk.19.attn_v.weight | Block 19 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 179 | blk.19.ffn_down.weight | Block 19 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 180 | blk.19.ffn_gate.weight | Block 19 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 181 | blk.19.ffn_norm.weight | Block 19 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 182 | blk.19.ffn_up.weight | Block 19 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in blk.19: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.19: 4.4748 bits + + +### Block 20 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 183 | blk.20.attn_k.weight | Block 20 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 184 | blk.20.attn_norm.weight | Block 20 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 185 | blk.20.attn_output.weight | Block 20 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 186 | blk.20.attn_q.weight | Block 20 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 187 | blk.20.attn_v.weight | Block 20 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 188 | blk.20.ffn_down.weight | Block 20 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 189 | blk.20.ffn_gate.weight | Block 20 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 190 | blk.20.ffn_norm.weight | Block 20 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 191 | blk.20.ffn_up.weight | Block 20 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.20: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.20: 4.4090 bits + + +### Block 21 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 192 | blk.21.attn_k.weight | Block 21 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 193 | blk.21.attn_norm.weight | Block 21 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 194 | blk.21.attn_output.weight | Block 21 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 195 | blk.21.attn_q.weight | Block 21 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 196 | blk.21.attn_v.weight | Block 21 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 197 | blk.21.ffn_down.weight | Block 21 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 198 | blk.21.ffn_gate.weight | Block 21 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 199 | blk.21.ffn_norm.weight | Block 21 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 200 | blk.21.ffn_up.weight | Block 21 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.21: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.21: 4.4038 bits + + +### Block 22 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 201 | blk.22.attn_k.weight | Block 22 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 202 | blk.22.attn_norm.weight | Block 22 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 203 | blk.22.attn_output.weight | Block 22 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 204 | blk.22.attn_q.weight | Block 22 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 205 | blk.22.attn_v.weight | Block 22 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 206 | blk.22.ffn_down.weight | Block 22 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 207 | blk.22.ffn_gate.weight | Block 22 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 208 | blk.22.ffn_norm.weight | Block 22 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 209 | blk.22.ffn_up.weight | Block 22 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.22: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.22: 4.4090 bits + + +### Block 23 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 210 | blk.23.attn_k.weight | Block 23 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 211 | blk.23.attn_norm.weight | Block 23 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 212 | blk.23.attn_output.weight | Block 23 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 213 | blk.23.attn_q.weight | Block 23 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 214 | blk.23.attn_v.weight | Block 23 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 215 | blk.23.ffn_down.weight | Block 23 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 216 | blk.23.ffn_gate.weight | Block 23 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 217 | blk.23.ffn_norm.weight | Block 23 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 218 | blk.23.ffn_up.weight | Block 23 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in blk.23: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.23: 4.4748 bits + + +### Block 24 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 219 | blk.24.attn_k.weight | Block 24 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 220 | blk.24.attn_norm.weight | Block 24 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 221 | blk.24.attn_output.weight | Block 24 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 222 | blk.24.attn_q.weight | Block 24 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 223 | blk.24.attn_v.weight | Block 24 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 224 | blk.24.ffn_down.weight | Block 24 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 225 | blk.24.ffn_gate.weight | Block 24 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 226 | blk.24.ffn_norm.weight | Block 24 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 227 | blk.24.ffn_up.weight | Block 24 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in blk.24: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.24: 4.4696 bits + + +### Block 25 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 228 | blk.25.attn_k.weight | Block 25 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 229 | blk.25.attn_norm.weight | Block 25 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 230 | blk.25.attn_output.weight | Block 25 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 231 | blk.25.attn_q.weight | Block 25 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 232 | blk.25.attn_v.weight | Block 25 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 233 | blk.25.ffn_down.weight | Block 25 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 234 | blk.25.ffn_gate.weight | Block 25 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 235 | blk.25.ffn_norm.weight | Block 25 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 236 | blk.25.ffn_up.weight | Block 25 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in blk.25: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.25: 4.4748 bits + + +### Block 26 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 237 | blk.26.attn_k.weight | Block 26 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 238 | blk.26.attn_norm.weight | Block 26 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 239 | blk.26.attn_output.weight | Block 26 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 240 | blk.26.attn_q.weight | Block 26 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 241 | blk.26.attn_v.weight | Block 26 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 242 | blk.26.ffn_down.weight | Block 26 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 243 | blk.26.ffn_gate.weight | Block 26 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 244 | blk.26.ffn_norm.weight | Block 26 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 245 | blk.26.ffn_up.weight | Block 26 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.26: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.26: 4.4090 bits + + +### Block 27 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 246 | blk.27.attn_k.weight | Block 27 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 247 | blk.27.attn_norm.weight | Block 27 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 248 | blk.27.attn_output.weight | Block 27 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 249 | blk.27.attn_q.weight | Block 27 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 250 | blk.27.attn_v.weight | Block 27 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 251 | blk.27.ffn_down.weight | Block 27 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 252 | blk.27.ffn_gate.weight | Block 27 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 253 | blk.27.ffn_norm.weight | Block 27 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 254 | blk.27.ffn_up.weight | Block 27 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.27: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.27: 4.4090 bits + + +### Block 28 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 255 | blk.28.attn_k.weight | Block 28 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 256 | blk.28.attn_norm.weight | Block 28 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 257 | blk.28.attn_output.weight | Block 28 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 258 | blk.28.attn_q.weight | Block 28 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 259 | blk.28.attn_v.weight | Block 28 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 260 | blk.28.ffn_down.weight | Block 28 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 261 | blk.28.ffn_gate.weight | Block 28 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 262 | blk.28.ffn_norm.weight | Block 28 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 263 | blk.28.ffn_up.weight | Block 28 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in blk.28: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.28: 4.4748 bits + + +### Block 29 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 264 | blk.29.attn_k.weight | Block 29 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 265 | blk.29.attn_norm.weight | Block 29 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 266 | blk.29.attn_output.weight | Block 29 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 267 | blk.29.attn_q.weight | Block 29 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 268 | blk.29.attn_v.weight | Block 29 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 269 | blk.29.ffn_down.weight | Block 29 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 270 | blk.29.ffn_gate.weight | Block 29 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 271 | blk.29.ffn_norm.weight | Block 29 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 272 | blk.29.ffn_up.weight | Block 29 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in blk.29: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.29: 4.4748 bits + + +### Block 30 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 273 | blk.30.attn_k.weight | Block 30 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 274 | blk.30.attn_norm.weight | Block 30 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 275 | blk.30.attn_output.weight | Block 30 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 276 | blk.30.attn_q.weight | Block 30 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 277 | blk.30.attn_v.weight | Block 30 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 278 | blk.30.ffn_down.weight | Block 30 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 279 | blk.30.ffn_gate.weight | Block 30 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 280 | blk.30.ffn_norm.weight | Block 30 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 281 | blk.30.ffn_up.weight | Block 30 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.30: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.30: 4.4090 bits + + +### Block 31 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 282 | blk.31.attn_k.weight | Block 31 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 283 | blk.31.attn_norm.weight | Block 31 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 284 | blk.31.attn_output.weight | Block 31 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 285 | blk.31.attn_q.weight | Block 31 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 286 | blk.31.attn_v.weight | Block 31 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 287 | blk.31.ffn_down.weight | Block 31 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 288 | blk.31.ffn_gate.weight | Block 31 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 289 | blk.31.ffn_norm.weight | Block 31 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 290 | blk.31.ffn_up.weight | Block 31 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in blk.31: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.31: 4.4748 bits + + +### Block 32 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 291 | blk.32.attn_k.weight | Block 32 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 292 | blk.32.attn_norm.weight | Block 32 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 293 | blk.32.attn_output.weight | Block 32 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 294 | blk.32.attn_q.weight | Block 32 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 295 | blk.32.attn_v.weight | Block 32 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 296 | blk.32.ffn_down.weight | Block 32 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 297 | blk.32.ffn_gate.weight | Block 32 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 298 | blk.32.ffn_norm.weight | Block 32 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 299 | blk.32.ffn_up.weight | Block 32 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in blk.32: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.32: 4.4748 bits + + +### Block 33 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 300 | blk.33.attn_k.weight | Block 33 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 301 | blk.33.attn_norm.weight | Block 33 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 302 | blk.33.attn_output.weight | Block 33 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 303 | blk.33.attn_q.weight | Block 33 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 304 | blk.33.attn_v.weight | Block 33 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 305 | blk.33.ffn_down.weight | Block 33 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 306 | blk.33.ffn_gate.weight | Block 33 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 307 | blk.33.ffn_norm.weight | Block 33 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 308 | blk.33.ffn_up.weight | Block 33 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in blk.33: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.33: 4.4748 bits + + +### Block 34 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 309 | blk.34.attn_k.weight | Block 34 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 310 | blk.34.attn_norm.weight | Block 34 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 311 | blk.34.attn_output.weight | Block 34 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 312 | blk.34.attn_q.weight | Block 34 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 313 | blk.34.attn_v.weight | Block 34 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 314 | blk.34.ffn_down.weight | Block 34 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 315 | blk.34.ffn_gate.weight | Block 34 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 316 | blk.34.ffn_norm.weight | Block 34 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 317 | blk.34.ffn_up.weight | Block 34 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in blk.34: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.34: 4.4748 bits + + +### Block 35 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 318 | blk.35.attn_k.weight | Block 35 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 319 | blk.35.attn_norm.weight | Block 35 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 320 | blk.35.attn_output.weight | Block 35 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 321 | blk.35.attn_q.weight | Block 35 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 322 | blk.35.attn_v.weight | Block 35 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 323 | blk.35.ffn_down.weight | Block 35 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 324 | blk.35.ffn_gate.weight | Block 35 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 325 | blk.35.ffn_norm.weight | Block 35 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 326 | blk.35.ffn_up.weight | Block 35 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.35: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.35: 4.3827 bits + + +### Block 36 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 327 | blk.36.attn_k.weight | Block 36 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 328 | blk.36.attn_norm.weight | Block 36 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 329 | blk.36.attn_output.weight | Block 36 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 330 | blk.36.attn_q.weight | Block 36 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 331 | blk.36.attn_v.weight | Block 36 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 332 | blk.36.ffn_down.weight | Block 36 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 333 | blk.36.ffn_gate.weight | Block 36 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 334 | blk.36.ffn_norm.weight | Block 36 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 335 | blk.36.ffn_up.weight | Block 36 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.36: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.36: 4.3827 bits + + +### Block 37 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 336 | blk.37.attn_k.weight | Block 37 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 337 | blk.37.attn_norm.weight | Block 37 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 338 | blk.37.attn_output.weight | Block 37 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 339 | blk.37.attn_q.weight | Block 37 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 340 | blk.37.attn_v.weight | Block 37 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 341 | blk.37.ffn_down.weight | Block 37 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 342 | blk.37.ffn_gate.weight | Block 37 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 343 | blk.37.ffn_norm.weight | Block 37 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 344 | blk.37.ffn_up.weight | Block 37 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | + +- Total elements in blk.37: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.37: 4.4038 bits + + +### Block 38 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 345 | blk.38.attn_k.weight | Block 38 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 346 | blk.38.attn_norm.weight | Block 38 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 347 | blk.38.attn_output.weight | Block 38 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 348 | blk.38.attn_q.weight | Block 38 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 349 | blk.38.attn_v.weight | Block 38 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 350 | blk.38.ffn_down.weight | Block 38 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 351 | blk.38.ffn_gate.weight | Block 38 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 352 | blk.38.ffn_norm.weight | Block 38 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 353 | blk.38.ffn_up.weight | Block 38 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in blk.38: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.38: 4.4274 bits + + +### Block 39 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 354 | blk.39.attn_k.weight | Block 39 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 355 | blk.39.attn_norm.weight | Block 39 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 356 | blk.39.attn_output.weight | Block 39 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 357 | blk.39.attn_q.weight | Block 39 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 358 | blk.39.attn_v.weight | Block 39 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 359 | blk.39.ffn_down.weight | Block 39 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 360 | blk.39.ffn_gate.weight | Block 39 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | +| 361 | blk.39.ffn_norm.weight | Block 39 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 362 | blk.39.ffn_up.weight | Block 39 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q4_K | 4.5000 | + +- Total elements in blk.39: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.39: 4.5801 bits + + +Total BPW for granite-4.1-8b-Q4_K.gguf: 4.4999 bits diff --git a/scores/granite-4.1-8b-Q5_K.md b/scores/granite-4.1-8b-Q5_K.md new file mode 100644 index 0000000..267d7c8 --- /dev/null +++ b/scores/granite-4.1-8b-Q5_K.md @@ -0,0 +1,1247 @@ +# granite-4.1-8b-WIP/granite-4.1-8b-Q5_K.gguf - GGUF Internal File Dump + +- Endian: LITTLE endian + +## Key Value Metadata Store + +There are 42 key-value pairs in this file + +| POS | TYPE | Count | Key | Value | +|----:|:---------|-------:|:-----------------------------------------|:--------------------------------------------------------------------| +| 1 | UINT32 | 1 | GGUF.version | 3 | +| 2 | UINT64 | 1 | GGUF.tensor_count | 363 | +| 3 | UINT64 | 1 | GGUF.kv_count | 39 | +| 4 | STRING | 1 | general.architecture | `granite` | +| 5 | STRING | 1 | general.type | `model` | +| 6 | STRING | 1 | general.name | `Granite 4.1 8b` | +| 7 | STRING | 1 | general.basename | `granite-4.1` | +| 8 | STRING | 1 | general.size_label | `8B` | +| 9 | STRING | 1 | general.license | `apache-2.0` | +| 10 | [STRING] | 2 | general.tags | [ `language`, `granite-4.1` ] | +| 11 | UINT32 | 1 | granite.block_count | 40 | +| 12 | UINT32 | 1 | granite.context_length | 131072 | +| 13 | UINT32 | 1 | granite.embedding_length | 4096 | +| 14 | UINT32 | 1 | granite.feed_forward_length | 12800 | +| 15 | UINT32 | 1 | granite.attention.head_count | 32 | +| 16 | UINT32 | 1 | granite.attention.head_count_kv | 8 | +| 17 | FLOAT32 | 1 | granite.rope.freq_base | 10000000.0 | +| 18 | FLOAT32 | 1 | granite.attention.layer_norm_rms_epsilon | 1e-05 | +| 19 | UINT32 | 1 | granite.vocab_size | 100352 | +| 20 | UINT32 | 1 | granite.rope.dimension_count | 128 | +| 21 | FLOAT32 | 1 | granite.attention.scale | 0.0078125 | +| 22 | FLOAT32 | 1 | granite.embedding_scale | 12.0 | +| 23 | FLOAT32 | 1 | granite.residual_scale | 0.22 | +| 24 | FLOAT32 | 1 | granite.logit_scale | 16.0 | +| 25 | STRING | 1 | tokenizer.ggml.model | `gpt2` | +| 26 | STRING | 1 | tokenizer.ggml.pre | `granite-docling` | +| 27 | [STRING] | 100352 | tokenizer.ggml.tokens | [ `!`, `"`, `#`, `$`, `%`, ... ] | +| 28 | [INT32] | 100352 | tokenizer.ggml.token_type | [ 1, 1, 1, 1, 1, 1, 1, ... ] | +| 29 | [STRING] | 100000 | tokenizer.ggml.merges | [ `Ġ Ġ`, `ĠĠ ĠĠ`, `i n`, `Ġ t`, `ĠĠĠĠ ĠĠĠĠ`, ... ] | +| 30 | UINT32 | 1 | tokenizer.ggml.bos_token_id | 100257 | +| 31 | UINT32 | 1 | tokenizer.ggml.eos_token_id | 100257 | +| 32 | UINT32 | 1 | tokenizer.ggml.unknown_token_id | 100269 | +| 33 | UINT32 | 1 | tokenizer.ggml.padding_token_id | 100256 | +| 34 | BOOL | 1 | tokenizer.ggml.add_bos_token | False | +| 35 | STRING | 1 | tokenizer.chat_template | `{%- set tools_system_message_p`...`end_of_role|>' }}{%- endif %}` | +| 36 | BOOL | 1 | tokenizer.ggml.add_space_prefix | False | +| 37 | UINT32 | 1 | general.quantization_version | 2 | +| 38 | UINT32 | 1 | general.file_type | 17 | +| 39 | STRING | 1 | quantize.imatrix.file | `imatrix/imatrix-granite-4.1-8b-medium.gguf` | +| 40 | STRING | 1 | quantize.imatrix.dataset | `../../datasets/imatrix/combined_all_medium.txt` | +| 41 | UINT32 | 1 | quantize.imatrix.entries_count | 280 | +| 42 | UINT32 | 1 | quantize.imatrix.chunks_count | 3601 | + +## Tensors Overview ~9B Elements + +Total number of elements in all tensors: 8791592960 Elements + +- [granite-4.1-8b-WIP/granite-4.1-8b-Q5\_K.gguf - GGUF Internal File Dump](#granite-41-8b-wipgranite-41-8b-q5_kgguf---gguf-internal-file-dump) + - [Key Value Metadata Store](#key-value-metadata-store) + - [Tensors Overview ~9B Elements](#tensors-overview-9b-elements) + - [Tensor Data Offset](#tensor-data-offset) + - [Base Tensor Group : ~822M Elements](#base-tensor-group--822m-elements) + - [Block 0 Tensor Group : ~199M Elements](#block-0-tensor-group--199m-elements) + - [Block 1 Tensor Group : ~199M Elements](#block-1-tensor-group--199m-elements) + - [Block 2 Tensor Group : ~199M Elements](#block-2-tensor-group--199m-elements) + - [Block 3 Tensor Group : ~199M Elements](#block-3-tensor-group--199m-elements) + - [Block 4 Tensor Group : ~199M Elements](#block-4-tensor-group--199m-elements) + - [Block 5 Tensor Group : ~199M Elements](#block-5-tensor-group--199m-elements) + - [Block 6 Tensor Group : ~199M Elements](#block-6-tensor-group--199m-elements) + - [Block 7 Tensor Group : ~199M Elements](#block-7-tensor-group--199m-elements) + - [Block 8 Tensor Group : ~199M Elements](#block-8-tensor-group--199m-elements) + - [Block 9 Tensor Group : ~199M Elements](#block-9-tensor-group--199m-elements) + - [Block 10 Tensor Group : ~199M Elements](#block-10-tensor-group--199m-elements) + - [Block 11 Tensor Group : ~199M Elements](#block-11-tensor-group--199m-elements) + - [Block 12 Tensor Group : ~199M Elements](#block-12-tensor-group--199m-elements) + - [Block 13 Tensor Group : ~199M Elements](#block-13-tensor-group--199m-elements) + - [Block 14 Tensor Group : ~199M Elements](#block-14-tensor-group--199m-elements) + - [Block 15 Tensor Group : ~199M Elements](#block-15-tensor-group--199m-elements) + - [Block 16 Tensor Group : ~199M Elements](#block-16-tensor-group--199m-elements) + - [Block 17 Tensor Group : ~199M Elements](#block-17-tensor-group--199m-elements) + - [Block 18 Tensor Group : ~199M Elements](#block-18-tensor-group--199m-elements) + - [Block 19 Tensor Group : ~199M Elements](#block-19-tensor-group--199m-elements) + - [Block 20 Tensor Group : ~199M Elements](#block-20-tensor-group--199m-elements) + - [Block 21 Tensor Group : ~199M Elements](#block-21-tensor-group--199m-elements) + - [Block 22 Tensor Group : ~199M Elements](#block-22-tensor-group--199m-elements) + - [Block 23 Tensor Group : ~199M Elements](#block-23-tensor-group--199m-elements) + - [Block 24 Tensor Group : ~199M Elements](#block-24-tensor-group--199m-elements) + - [Block 25 Tensor Group : ~199M Elements](#block-25-tensor-group--199m-elements) + - [Block 26 Tensor Group : ~199M Elements](#block-26-tensor-group--199m-elements) + - [Block 27 Tensor Group : ~199M Elements](#block-27-tensor-group--199m-elements) + - [Block 28 Tensor Group : ~199M Elements](#block-28-tensor-group--199m-elements) + - [Block 29 Tensor Group : ~199M Elements](#block-29-tensor-group--199m-elements) + - [Block 30 Tensor Group : ~199M Elements](#block-30-tensor-group--199m-elements) + - [Block 31 Tensor Group : ~199M Elements](#block-31-tensor-group--199m-elements) + - [Block 32 Tensor Group : ~199M Elements](#block-32-tensor-group--199m-elements) + - [Block 33 Tensor Group : ~199M Elements](#block-33-tensor-group--199m-elements) + - [Block 34 Tensor Group : ~199M Elements](#block-34-tensor-group--199m-elements) + - [Block 35 Tensor Group : ~199M Elements](#block-35-tensor-group--199m-elements) + - [Block 36 Tensor Group : ~199M Elements](#block-36-tensor-group--199m-elements) + - [Block 37 Tensor Group : ~199M Elements](#block-37-tensor-group--199m-elements) + - [Block 38 Tensor Group : ~199M Elements](#block-38-tensor-group--199m-elements) + - [Block 39 Tensor Group : ~199M Elements](#block-39-tensor-group--199m-elements) + +### Tensor Data Offset + +This table contains the offset and data segment relative to start of file + +| T_ID | Tensor Layer Name | Data Offset (B) | Data Size (B) | +|-----:|:--------------------------|-----------------:|-----------------:| +| 0 | output.weight | 0x367400 | 0x14190000 | +| 1 | output_norm.weight | 0x144f7400 | 0x4000 | +| 2 | token_embd.weight | 0x144fb400 | 0x10d80000 | +| 3 | blk.0.attn_k.weight | 0x2527b400 | 0x220000 | +| 4 | blk.0.attn_norm.weight | 0x2549b400 | 0x4000 | +| 5 | blk.0.attn_output.weight | 0x2549f400 | 0x900000 | +| 6 | blk.0.attn_q.weight | 0x25d9f400 | 0x880000 | +| 7 | blk.0.attn_v.weight | 0x2661f400 | 0x2c0000 | +| 8 | blk.0.ffn_down.weight | 0x268df400 | 0x1c20000 | +| 9 | blk.0.ffn_gate.weight | 0x284ff400 | 0x1a90000 | +| 10 | blk.0.ffn_norm.weight | 0x29f8f400 | 0x4000 | +| 11 | blk.0.ffn_up.weight | 0x29f93400 | 0x2260000 | +| 12 | blk.1.attn_k.weight | 0x2c1f3400 | 0x240000 | +| 13 | blk.1.attn_norm.weight | 0x2c433400 | 0x4000 | +| 14 | blk.1.attn_output.weight | 0x2c437400 | 0xb00000 | +| 15 | blk.1.attn_q.weight | 0x2cf37400 | 0xb00000 | +| 16 | blk.1.attn_v.weight | 0x2da37400 | 0x2c0000 | +| 17 | blk.1.ffn_down.weight | 0x2dcf7400 | 0x2260000 | +| 18 | blk.1.ffn_gate.weight | 0x2ff57400 | 0x2260000 | +| 19 | blk.1.ffn_norm.weight | 0x321b7400 | 0x4000 | +| 20 | blk.1.ffn_up.weight | 0x321bb400 | 0x2260000 | +| 21 | blk.2.attn_k.weight | 0x3441b400 | 0x240000 | +| 22 | blk.2.attn_norm.weight | 0x3465b400 | 0x4000 | +| 23 | blk.2.attn_output.weight | 0x3465f400 | 0xb00000 | +| 24 | blk.2.attn_q.weight | 0x3515f400 | 0xb00000 | +| 25 | blk.2.attn_v.weight | 0x35c5f400 | 0x2c0000 | +| 26 | blk.2.ffn_down.weight | 0x35f1f400 | 0x2260000 | +| 27 | blk.2.ffn_gate.weight | 0x3817f400 | 0x2260000 | +| 28 | blk.2.ffn_norm.weight | 0x3a3df400 | 0x4000 | +| 29 | blk.2.ffn_up.weight | 0x3a3e3400 | 0x2260000 | +| 30 | blk.3.attn_k.weight | 0x3c643400 | 0x240000 | +| 31 | blk.3.attn_norm.weight | 0x3c883400 | 0x4000 | +| 32 | blk.3.attn_output.weight | 0x3c887400 | 0xb00000 | +| 33 | blk.3.attn_q.weight | 0x3d387400 | 0x900000 | +| 34 | blk.3.attn_v.weight | 0x3dc87400 | 0x2c0000 | +| 35 | blk.3.ffn_down.weight | 0x3df47400 | 0x1c20000 | +| 36 | blk.3.ffn_gate.weight | 0x3fb67400 | 0x2260000 | +| 37 | blk.3.ffn_norm.weight | 0x41dc7400 | 0x4000 | +| 38 | blk.3.ffn_up.weight | 0x41dcb400 | 0x2260000 | +| 39 | blk.4.attn_k.weight | 0x4402b400 | 0x2c0000 | +| 40 | blk.4.attn_norm.weight | 0x442eb400 | 0x4000 | +| 41 | blk.4.attn_output.weight | 0x442ef400 | 0xb00000 | +| 42 | blk.4.attn_q.weight | 0x44def400 | 0xb00000 | +| 43 | blk.4.attn_v.weight | 0x458ef400 | 0x2c0000 | +| 44 | blk.4.ffn_down.weight | 0x45baf400 | 0x2260000 | +| 45 | blk.4.ffn_gate.weight | 0x47e0f400 | 0x2260000 | +| 46 | blk.4.ffn_norm.weight | 0x4a06f400 | 0x4000 | +| 47 | blk.4.ffn_up.weight | 0x4a073400 | 0x2260000 | +| 48 | blk.5.attn_k.weight | 0x4c2d3400 | 0x240000 | +| 49 | blk.5.attn_norm.weight | 0x4c513400 | 0x4000 | +| 50 | blk.5.attn_output.weight | 0x4c517400 | 0xb00000 | +| 51 | blk.5.attn_q.weight | 0x4d017400 | 0xb00000 | +| 52 | blk.5.attn_v.weight | 0x4db17400 | 0x2c0000 | +| 53 | blk.5.ffn_down.weight | 0x4ddd7400 | 0x1c20000 | +| 54 | blk.5.ffn_gate.weight | 0x4f9f7400 | 0x2260000 | +| 55 | blk.5.ffn_norm.weight | 0x51c57400 | 0x4000 | +| 56 | blk.5.ffn_up.weight | 0x51c5b400 | 0x2260000 | +| 57 | blk.6.attn_k.weight | 0x53ebb400 | 0x240000 | +| 58 | blk.6.attn_norm.weight | 0x540fb400 | 0x4000 | +| 59 | blk.6.attn_output.weight | 0x540ff400 | 0xb00000 | +| 60 | blk.6.attn_q.weight | 0x54bff400 | 0xb00000 | +| 61 | blk.6.attn_v.weight | 0x556ff400 | 0x300000 | +| 62 | blk.6.ffn_down.weight | 0x559ff400 | 0x2260000 | +| 63 | blk.6.ffn_gate.weight | 0x57c5f400 | 0x2260000 | +| 64 | blk.6.ffn_norm.weight | 0x59ebf400 | 0x4000 | +| 65 | blk.6.ffn_up.weight | 0x59ec3400 | 0x2260000 | +| 66 | blk.7.attn_k.weight | 0x5c123400 | 0x2c0000 | +| 67 | blk.7.attn_norm.weight | 0x5c3e3400 | 0x4000 | +| 68 | blk.7.attn_output.weight | 0x5c3e7400 | 0xb00000 | +| 69 | blk.7.attn_q.weight | 0x5cee7400 | 0xb00000 | +| 70 | blk.7.attn_v.weight | 0x5d9e7400 | 0x2c0000 | +| 71 | blk.7.ffn_down.weight | 0x5dca7400 | 0x2260000 | +| 72 | blk.7.ffn_gate.weight | 0x5ff07400 | 0x2260000 | +| 73 | blk.7.ffn_norm.weight | 0x62167400 | 0x4000 | +| 74 | blk.7.ffn_up.weight | 0x6216b400 | 0x2260000 | +| 75 | blk.8.attn_k.weight | 0x643cb400 | 0x240000 | +| 76 | blk.8.attn_norm.weight | 0x6460b400 | 0x4000 | +| 77 | blk.8.attn_output.weight | 0x6460f400 | 0xb00000 | +| 78 | blk.8.attn_q.weight | 0x6510f400 | 0xb00000 | +| 79 | blk.8.attn_v.weight | 0x65c0f400 | 0x2c0000 | +| 80 | blk.8.ffn_down.weight | 0x65ecf400 | 0x2260000 | +| 81 | blk.8.ffn_gate.weight | 0x6812f400 | 0x2260000 | +| 82 | blk.8.ffn_norm.weight | 0x6a38f400 | 0x4000 | +| 83 | blk.8.ffn_up.weight | 0x6a393400 | 0x2260000 | +| 84 | blk.9.attn_k.weight | 0x6c5f3400 | 0x240000 | +| 85 | blk.9.attn_norm.weight | 0x6c833400 | 0x4000 | +| 86 | blk.9.attn_output.weight | 0x6c837400 | 0xb00000 | +| 87 | blk.9.attn_q.weight | 0x6d337400 | 0xb00000 | +| 88 | blk.9.attn_v.weight | 0x6de37400 | 0x2c0000 | +| 89 | blk.9.ffn_down.weight | 0x6e0f7400 | 0x2260000 | +| 90 | blk.9.ffn_gate.weight | 0x70357400 | 0x2260000 | +| 91 | blk.9.ffn_norm.weight | 0x725b7400 | 0x4000 | +| 92 | blk.9.ffn_up.weight | 0x725bb400 | 0x2260000 | +| 93 | blk.10.attn_k.weight | 0x7481b400 | 0x240000 | +| 94 | blk.10.attn_norm.weight | 0x74a5b400 | 0x4000 | +| 95 | blk.10.attn_output.weight | 0x74a5f400 | 0xb00000 | +| 96 | blk.10.attn_q.weight | 0x7555f400 | 0xb00000 | +| 97 | blk.10.attn_v.weight | 0x7605f400 | 0x2c0000 | +| 98 | blk.10.ffn_down.weight | 0x7631f400 | 0x2260000 | +| 99 | blk.10.ffn_gate.weight | 0x7857f400 | 0x2260000 | +| 100 | blk.10.ffn_norm.weight | 0x7a7df400 | 0x4000 | +| 101 | blk.10.ffn_up.weight | 0x7a7e3400 | 0x2260000 | +| 102 | blk.11.attn_k.weight | 0x7ca43400 | 0x240000 | +| 103 | blk.11.attn_norm.weight | 0x7cc83400 | 0x4000 | +| 104 | blk.11.attn_output.weight | 0x7cc87400 | 0xb00000 | +| 105 | blk.11.attn_q.weight | 0x7d787400 | 0xb00000 | +| 106 | blk.11.attn_v.weight | 0x7e287400 | 0x2c0000 | +| 107 | blk.11.ffn_down.weight | 0x7e547400 | 0x2260000 | +| 108 | blk.11.ffn_gate.weight | 0x807a7400 | 0x1a90000 | +| 109 | blk.11.ffn_norm.weight | 0x82237400 | 0x4000 | +| 110 | blk.11.ffn_up.weight | 0x8223b400 | 0x2260000 | +| 111 | blk.12.attn_k.weight | 0x8449b400 | 0x240000 | +| 112 | blk.12.attn_norm.weight | 0x846db400 | 0x4000 | +| 113 | blk.12.attn_output.weight | 0x846df400 | 0xb00000 | +| 114 | blk.12.attn_q.weight | 0x851df400 | 0xb00000 | +| 115 | blk.12.attn_v.weight | 0x85cdf400 | 0x2c0000 | +| 116 | blk.12.ffn_down.weight | 0x85f9f400 | 0x2260000 | +| 117 | blk.12.ffn_gate.weight | 0x881ff400 | 0x1a90000 | +| 118 | blk.12.ffn_norm.weight | 0x89c8f400 | 0x4000 | +| 119 | blk.12.ffn_up.weight | 0x89c93400 | 0x2260000 | +| 120 | blk.13.attn_k.weight | 0x8bef3400 | 0x2c0000 | +| 121 | blk.13.attn_norm.weight | 0x8c1b3400 | 0x4000 | +| 122 | blk.13.attn_output.weight | 0x8c1b7400 | 0xb00000 | +| 123 | blk.13.attn_q.weight | 0x8ccb7400 | 0xb00000 | +| 124 | blk.13.attn_v.weight | 0x8d7b7400 | 0x2c0000 | +| 125 | blk.13.ffn_down.weight | 0x8da77400 | 0x2260000 | +| 126 | blk.13.ffn_gate.weight | 0x8fcd7400 | 0x2260000 | +| 127 | blk.13.ffn_norm.weight | 0x91f37400 | 0x4000 | +| 128 | blk.13.ffn_up.weight | 0x91f3b400 | 0x2260000 | +| 129 | blk.14.attn_k.weight | 0x9419b400 | 0x2c0000 | +| 130 | blk.14.attn_norm.weight | 0x9445b400 | 0x4000 | +| 131 | blk.14.attn_output.weight | 0x9445f400 | 0xb00000 | +| 132 | blk.14.attn_q.weight | 0x94f5f400 | 0xb00000 | +| 133 | blk.14.attn_v.weight | 0x95a5f400 | 0x2c0000 | +| 134 | blk.14.ffn_down.weight | 0x95d1f400 | 0x2260000 | +| 135 | blk.14.ffn_gate.weight | 0x97f7f400 | 0x2260000 | +| 136 | blk.14.ffn_norm.weight | 0x9a1df400 | 0x4000 | +| 137 | blk.14.ffn_up.weight | 0x9a1e3400 | 0x2260000 | +| 138 | blk.15.attn_k.weight | 0x9c443400 | 0x2c0000 | +| 139 | blk.15.attn_norm.weight | 0x9c703400 | 0x4000 | +| 140 | blk.15.attn_output.weight | 0x9c707400 | 0xb00000 | +| 141 | blk.15.attn_q.weight | 0x9d207400 | 0xb00000 | +| 142 | blk.15.attn_v.weight | 0x9dd07400 | 0x2c0000 | +| 143 | blk.15.ffn_down.weight | 0x9dfc7400 | 0x2260000 | +| 144 | blk.15.ffn_gate.weight | 0xa0227400 | 0x2260000 | +| 145 | blk.15.ffn_norm.weight | 0xa2487400 | 0x4000 | +| 146 | blk.15.ffn_up.weight | 0xa248b400 | 0x2260000 | +| 147 | blk.16.attn_k.weight | 0xa46eb400 | 0x2c0000 | +| 148 | blk.16.attn_norm.weight | 0xa49ab400 | 0x4000 | +| 149 | blk.16.attn_output.weight | 0xa49af400 | 0xb00000 | +| 150 | blk.16.attn_q.weight | 0xa54af400 | 0xb00000 | +| 151 | blk.16.attn_v.weight | 0xa5faf400 | 0x2c0000 | +| 152 | blk.16.ffn_down.weight | 0xa626f400 | 0x2260000 | +| 153 | blk.16.ffn_gate.weight | 0xa84cf400 | 0x2260000 | +| 154 | blk.16.ffn_norm.weight | 0xaa72f400 | 0x4000 | +| 155 | blk.16.ffn_up.weight | 0xaa733400 | 0x2260000 | +| 156 | blk.17.attn_k.weight | 0xac993400 | 0x2c0000 | +| 157 | blk.17.attn_norm.weight | 0xacc53400 | 0x4000 | +| 158 | blk.17.attn_output.weight | 0xacc57400 | 0xb00000 | +| 159 | blk.17.attn_q.weight | 0xad757400 | 0xb00000 | +| 160 | blk.17.attn_v.weight | 0xae257400 | 0x2c0000 | +| 161 | blk.17.ffn_down.weight | 0xae517400 | 0x2260000 | +| 162 | blk.17.ffn_gate.weight | 0xb0777400 | 0x2260000 | +| 163 | blk.17.ffn_norm.weight | 0xb29d7400 | 0x4000 | +| 164 | blk.17.ffn_up.weight | 0xb29db400 | 0x2260000 | +| 165 | blk.18.attn_k.weight | 0xb4c3b400 | 0x240000 | +| 166 | blk.18.attn_norm.weight | 0xb4e7b400 | 0x4000 | +| 167 | blk.18.attn_output.weight | 0xb4e7f400 | 0xb00000 | +| 168 | blk.18.attn_q.weight | 0xb597f400 | 0xb00000 | +| 169 | blk.18.attn_v.weight | 0xb647f400 | 0x2c0000 | +| 170 | blk.18.ffn_down.weight | 0xb673f400 | 0x2260000 | +| 171 | blk.18.ffn_gate.weight | 0xb899f400 | 0x2260000 | +| 172 | blk.18.ffn_norm.weight | 0xbabff400 | 0x4000 | +| 173 | blk.18.ffn_up.weight | 0xbac03400 | 0x2260000 | +| 174 | blk.19.attn_k.weight | 0xbce63400 | 0x240000 | +| 175 | blk.19.attn_norm.weight | 0xbd0a3400 | 0x4000 | +| 176 | blk.19.attn_output.weight | 0xbd0a7400 | 0xb00000 | +| 177 | blk.19.attn_q.weight | 0xbdba7400 | 0xb00000 | +| 178 | blk.19.attn_v.weight | 0xbe6a7400 | 0x2c0000 | +| 179 | blk.19.ffn_down.weight | 0xbe967400 | 0x2260000 | +| 180 | blk.19.ffn_gate.weight | 0xc0bc7400 | 0x2260000 | +| 181 | blk.19.ffn_norm.weight | 0xc2e27400 | 0x4000 | +| 182 | blk.19.ffn_up.weight | 0xc2e2b400 | 0x2260000 | +| 183 | blk.20.attn_k.weight | 0xc508b400 | 0x2c0000 | +| 184 | blk.20.attn_norm.weight | 0xc534b400 | 0x4000 | +| 185 | blk.20.attn_output.weight | 0xc534f400 | 0xb00000 | +| 186 | blk.20.attn_q.weight | 0xc5e4f400 | 0xb00000 | +| 187 | blk.20.attn_v.weight | 0xc694f400 | 0x2c0000 | +| 188 | blk.20.ffn_down.weight | 0xc6c0f400 | 0x2260000 | +| 189 | blk.20.ffn_gate.weight | 0xc8e6f400 | 0x2260000 | +| 190 | blk.20.ffn_norm.weight | 0xcb0cf400 | 0x4000 | +| 191 | blk.20.ffn_up.weight | 0xcb0d3400 | 0x2260000 | +| 192 | blk.21.attn_k.weight | 0xcd333400 | 0x240000 | +| 193 | blk.21.attn_norm.weight | 0xcd573400 | 0x4000 | +| 194 | blk.21.attn_output.weight | 0xcd577400 | 0xb00000 | +| 195 | blk.21.attn_q.weight | 0xce077400 | 0xb00000 | +| 196 | blk.21.attn_v.weight | 0xceb77400 | 0x2c0000 | +| 197 | blk.21.ffn_down.weight | 0xcee37400 | 0x2260000 | +| 198 | blk.21.ffn_gate.weight | 0xd1097400 | 0x2260000 | +| 199 | blk.21.ffn_norm.weight | 0xd32f7400 | 0x4000 | +| 200 | blk.21.ffn_up.weight | 0xd32fb400 | 0x2260000 | +| 201 | blk.22.attn_k.weight | 0xd555b400 | 0x2c0000 | +| 202 | blk.22.attn_norm.weight | 0xd581b400 | 0x4000 | +| 203 | blk.22.attn_output.weight | 0xd581f400 | 0xb00000 | +| 204 | blk.22.attn_q.weight | 0xd631f400 | 0xb00000 | +| 205 | blk.22.attn_v.weight | 0xd6e1f400 | 0x2c0000 | +| 206 | blk.22.ffn_down.weight | 0xd70df400 | 0x2260000 | +| 207 | blk.22.ffn_gate.weight | 0xd933f400 | 0x2260000 | +| 208 | blk.22.ffn_norm.weight | 0xdb59f400 | 0x4000 | +| 209 | blk.22.ffn_up.weight | 0xdb5a3400 | 0x2260000 | +| 210 | blk.23.attn_k.weight | 0xdd803400 | 0x240000 | +| 211 | blk.23.attn_norm.weight | 0xdda43400 | 0x4000 | +| 212 | blk.23.attn_output.weight | 0xdda47400 | 0xb00000 | +| 213 | blk.23.attn_q.weight | 0xde547400 | 0xb00000 | +| 214 | blk.23.attn_v.weight | 0xdf047400 | 0x2c0000 | +| 215 | blk.23.ffn_down.weight | 0xdf307400 | 0x2260000 | +| 216 | blk.23.ffn_gate.weight | 0xe1567400 | 0x2260000 | +| 217 | blk.23.ffn_norm.weight | 0xe37c7400 | 0x4000 | +| 218 | blk.23.ffn_up.weight | 0xe37cb400 | 0x2260000 | +| 219 | blk.24.attn_k.weight | 0xe5a2b400 | 0x240000 | +| 220 | blk.24.attn_norm.weight | 0xe5c6b400 | 0x4000 | +| 221 | blk.24.attn_output.weight | 0xe5c6f400 | 0xd20000 | +| 222 | blk.24.attn_q.weight | 0xe698f400 | 0xb00000 | +| 223 | blk.24.attn_v.weight | 0xe748f400 | 0x2c0000 | +| 224 | blk.24.ffn_down.weight | 0xe774f400 | 0x2260000 | +| 225 | blk.24.ffn_gate.weight | 0xe99af400 | 0x2260000 | +| 226 | blk.24.ffn_norm.weight | 0xebc0f400 | 0x4000 | +| 227 | blk.24.ffn_up.weight | 0xebc13400 | 0x2260000 | +| 228 | blk.25.attn_k.weight | 0xede73400 | 0x240000 | +| 229 | blk.25.attn_norm.weight | 0xee0b3400 | 0x4000 | +| 230 | blk.25.attn_output.weight | 0xee0b7400 | 0xb00000 | +| 231 | blk.25.attn_q.weight | 0xeebb7400 | 0xb00000 | +| 232 | blk.25.attn_v.weight | 0xef6b7400 | 0x2c0000 | +| 233 | blk.25.ffn_down.weight | 0xef977400 | 0x2260000 | +| 234 | blk.25.ffn_gate.weight | 0xf1bd7400 | 0x2260000 | +| 235 | blk.25.ffn_norm.weight | 0xf3e37400 | 0x4000 | +| 236 | blk.25.ffn_up.weight | 0xf3e3b400 | 0x2260000 | +| 237 | blk.26.attn_k.weight | 0xf609b400 | 0x240000 | +| 238 | blk.26.attn_norm.weight | 0xf62db400 | 0x4000 | +| 239 | blk.26.attn_output.weight | 0xf62df400 | 0xb00000 | +| 240 | blk.26.attn_q.weight | 0xf6ddf400 | 0xb00000 | +| 241 | blk.26.attn_v.weight | 0xf78df400 | 0x2c0000 | +| 242 | blk.26.ffn_down.weight | 0xf7b9f400 | 0x2260000 | +| 243 | blk.26.ffn_gate.weight | 0xf9dff400 | 0x2260000 | +| 244 | blk.26.ffn_norm.weight | 0xfc05f400 | 0x4000 | +| 245 | blk.26.ffn_up.weight | 0xfc063400 | 0x2260000 | +| 246 | blk.27.attn_k.weight | 0xfe2c3400 | 0x2c0000 | +| 247 | blk.27.attn_norm.weight | 0xfe583400 | 0x4000 | +| 248 | blk.27.attn_output.weight | 0xfe587400 | 0xb00000 | +| 249 | blk.27.attn_q.weight | 0xff087400 | 0xb00000 | +| 250 | blk.27.attn_v.weight | 0xffb87400 | 0x2c0000 | +| 251 | blk.27.ffn_down.weight | 0xffe47400 | 0x2260000 | +| 252 | blk.27.ffn_gate.weight | 0x1020a7400 | 0x2260000 | +| 253 | blk.27.ffn_norm.weight | 0x104307400 | 0x4000 | +| 254 | blk.27.ffn_up.weight | 0x10430b400 | 0x2260000 | +| 255 | blk.28.attn_k.weight | 0x10656b400 | 0x2c0000 | +| 256 | blk.28.attn_norm.weight | 0x10682b400 | 0x4000 | +| 257 | blk.28.attn_output.weight | 0x10682f400 | 0xb00000 | +| 258 | blk.28.attn_q.weight | 0x10732f400 | 0xb00000 | +| 259 | blk.28.attn_v.weight | 0x107e2f400 | 0x2c0000 | +| 260 | blk.28.ffn_down.weight | 0x1080ef400 | 0x2260000 | +| 261 | blk.28.ffn_gate.weight | 0x10a34f400 | 0x2260000 | +| 262 | blk.28.ffn_norm.weight | 0x10c5af400 | 0x4000 | +| 263 | blk.28.ffn_up.weight | 0x10c5b3400 | 0x2260000 | +| 264 | blk.29.attn_k.weight | 0x10e813400 | 0x2c0000 | +| 265 | blk.29.attn_norm.weight | 0x10ead3400 | 0x4000 | +| 266 | blk.29.attn_output.weight | 0x10ead7400 | 0xb00000 | +| 267 | blk.29.attn_q.weight | 0x10f5d7400 | 0xb00000 | +| 268 | blk.29.attn_v.weight | 0x1100d7400 | 0x2c0000 | +| 269 | blk.29.ffn_down.weight | 0x110397400 | 0x2260000 | +| 270 | blk.29.ffn_gate.weight | 0x1125f7400 | 0x2260000 | +| 271 | blk.29.ffn_norm.weight | 0x114857400 | 0x4000 | +| 272 | blk.29.ffn_up.weight | 0x11485b400 | 0x2260000 | +| 273 | blk.30.attn_k.weight | 0x116abb400 | 0x2c0000 | +| 274 | blk.30.attn_norm.weight | 0x116d7b400 | 0x4000 | +| 275 | blk.30.attn_output.weight | 0x116d7f400 | 0xb00000 | +| 276 | blk.30.attn_q.weight | 0x11787f400 | 0xb00000 | +| 277 | blk.30.attn_v.weight | 0x11837f400 | 0x2c0000 | +| 278 | blk.30.ffn_down.weight | 0x11863f400 | 0x2260000 | +| 279 | blk.30.ffn_gate.weight | 0x11a89f400 | 0x2260000 | +| 280 | blk.30.ffn_norm.weight | 0x11caff400 | 0x4000 | +| 281 | blk.30.ffn_up.weight | 0x11cb03400 | 0x2260000 | +| 282 | blk.31.attn_k.weight | 0x11ed63400 | 0x2c0000 | +| 283 | blk.31.attn_norm.weight | 0x11f023400 | 0x4000 | +| 284 | blk.31.attn_output.weight | 0x11f027400 | 0xd20000 | +| 285 | blk.31.attn_q.weight | 0x11fd47400 | 0xb00000 | +| 286 | blk.31.attn_v.weight | 0x120847400 | 0x2c0000 | +| 287 | blk.31.ffn_down.weight | 0x120b07400 | 0x2260000 | +| 288 | blk.31.ffn_gate.weight | 0x122d67400 | 0x2260000 | +| 289 | blk.31.ffn_norm.weight | 0x124fc7400 | 0x4000 | +| 290 | blk.31.ffn_up.weight | 0x124fcb400 | 0x2260000 | +| 291 | blk.32.attn_k.weight | 0x12722b400 | 0x2c0000 | +| 292 | blk.32.attn_norm.weight | 0x1274eb400 | 0x4000 | +| 293 | blk.32.attn_output.weight | 0x1274ef400 | 0xb00000 | +| 294 | blk.32.attn_q.weight | 0x127fef400 | 0xb00000 | +| 295 | blk.32.attn_v.weight | 0x128aef400 | 0x2c0000 | +| 296 | blk.32.ffn_down.weight | 0x128daf400 | 0x2260000 | +| 297 | blk.32.ffn_gate.weight | 0x12b00f400 | 0x2260000 | +| 298 | blk.32.ffn_norm.weight | 0x12d26f400 | 0x4000 | +| 299 | blk.32.ffn_up.weight | 0x12d273400 | 0x2260000 | +| 300 | blk.33.attn_k.weight | 0x12f4d3400 | 0x2c0000 | +| 301 | blk.33.attn_norm.weight | 0x12f793400 | 0x4000 | +| 302 | blk.33.attn_output.weight | 0x12f797400 | 0xd20000 | +| 303 | blk.33.attn_q.weight | 0x1304b7400 | 0xb00000 | +| 304 | blk.33.attn_v.weight | 0x130fb7400 | 0x2c0000 | +| 305 | blk.33.ffn_down.weight | 0x131277400 | 0x2260000 | +| 306 | blk.33.ffn_gate.weight | 0x1334d7400 | 0x2260000 | +| 307 | blk.33.ffn_norm.weight | 0x135737400 | 0x4000 | +| 308 | blk.33.ffn_up.weight | 0x13573b400 | 0x2260000 | +| 309 | blk.34.attn_k.weight | 0x13799b400 | 0x2c0000 | +| 310 | blk.34.attn_norm.weight | 0x137c5b400 | 0x4000 | +| 311 | blk.34.attn_output.weight | 0x137c5f400 | 0xb00000 | +| 312 | blk.34.attn_q.weight | 0x13875f400 | 0xb00000 | +| 313 | blk.34.attn_v.weight | 0x13925f400 | 0x2c0000 | +| 314 | blk.34.ffn_down.weight | 0x13951f400 | 0x2260000 | +| 315 | blk.34.ffn_gate.weight | 0x13b77f400 | 0x2260000 | +| 316 | blk.34.ffn_norm.weight | 0x13d9df400 | 0x4000 | +| 317 | blk.34.ffn_up.weight | 0x13d9e3400 | 0x2260000 | +| 318 | blk.35.attn_k.weight | 0x13fc43400 | 0x240000 | +| 319 | blk.35.attn_norm.weight | 0x13fe83400 | 0x4000 | +| 320 | blk.35.attn_output.weight | 0x13fe87400 | 0xb00000 | +| 321 | blk.35.attn_q.weight | 0x140987400 | 0xb00000 | +| 322 | blk.35.attn_v.weight | 0x141487400 | 0x2c0000 | +| 323 | blk.35.ffn_down.weight | 0x141747400 | 0x2260000 | +| 324 | blk.35.ffn_gate.weight | 0x1439a7400 | 0x2260000 | +| 325 | blk.35.ffn_norm.weight | 0x145c07400 | 0x4000 | +| 326 | blk.35.ffn_up.weight | 0x145c0b400 | 0x2260000 | +| 327 | blk.36.attn_k.weight | 0x147e6b400 | 0x240000 | +| 328 | blk.36.attn_norm.weight | 0x1480ab400 | 0x4000 | +| 329 | blk.36.attn_output.weight | 0x1480af400 | 0xb00000 | +| 330 | blk.36.attn_q.weight | 0x148baf400 | 0xb00000 | +| 331 | blk.36.attn_v.weight | 0x1496af400 | 0x2c0000 | +| 332 | blk.36.ffn_down.weight | 0x14996f400 | 0x2260000 | +| 333 | blk.36.ffn_gate.weight | 0x14bbcf400 | 0x2260000 | +| 334 | blk.36.ffn_norm.weight | 0x14de2f400 | 0x4000 | +| 335 | blk.36.ffn_up.weight | 0x14de33400 | 0x2260000 | +| 336 | blk.37.attn_k.weight | 0x150093400 | 0x240000 | +| 337 | blk.37.attn_norm.weight | 0x1502d3400 | 0x4000 | +| 338 | blk.37.attn_output.weight | 0x1502d7400 | 0xb00000 | +| 339 | blk.37.attn_q.weight | 0x150dd7400 | 0xb00000 | +| 340 | blk.37.attn_v.weight | 0x1518d7400 | 0x2c0000 | +| 341 | blk.37.ffn_down.weight | 0x151b97400 | 0x2260000 | +| 342 | blk.37.ffn_gate.weight | 0x153df7400 | 0x2260000 | +| 343 | blk.37.ffn_norm.weight | 0x156057400 | 0x4000 | +| 344 | blk.37.ffn_up.weight | 0x15605b400 | 0x2260000 | +| 345 | blk.38.attn_k.weight | 0x1582bb400 | 0x240000 | +| 346 | blk.38.attn_norm.weight | 0x1584fb400 | 0x4000 | +| 347 | blk.38.attn_output.weight | 0x1584ff400 | 0xb00000 | +| 348 | blk.38.attn_q.weight | 0x158fff400 | 0xb00000 | +| 349 | blk.38.attn_v.weight | 0x159aff400 | 0x2c0000 | +| 350 | blk.38.ffn_down.weight | 0x159dbf400 | 0x2260000 | +| 351 | blk.38.ffn_gate.weight | 0x15c01f400 | 0x2260000 | +| 352 | blk.38.ffn_norm.weight | 0x15e27f400 | 0x4000 | +| 353 | blk.38.ffn_up.weight | 0x15e283400 | 0x2260000 | +| 354 | blk.39.attn_k.weight | 0x1604e3400 | 0x2c0000 | +| 355 | blk.39.attn_norm.weight | 0x1607a3400 | 0x4000 | +| 356 | blk.39.attn_output.weight | 0x1607a7400 | 0xb00000 | +| 357 | blk.39.attn_q.weight | 0x1612a7400 | 0xb00000 | +| 358 | blk.39.attn_v.weight | 0x161da7400 | 0x2c0000 | +| 359 | blk.39.ffn_down.weight | 0x162067400 | 0x2260000 | +| 360 | blk.39.ffn_gate.weight | 0x1642c7400 | 0x2260000 | +| 361 | blk.39.ffn_norm.weight | 0x166527400 | 0x4000 | +| 362 | blk.39.ffn_up.weight | 0x16652b400 | 0x2260000 | + +### Base Tensor Group : ~822M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------|:---------------------------------|:------------------|:----------------------|:-----|--------:| +| 0 | output.weight | Output (W) | (~411M) 411041792 | 4096 x 100352 x 1 x 1 | Q6_K | 6.5625 | +| 1 | output_norm.weight | Output Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 2 | token_embd.weight | Token Embedding (W) | (~411M) 411041792 | 4096 x 100352 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in base: (~822M) 822087680 +- Percentage of total elements: 9.35% +- Bits per Weight (BPW) for base: 6.0314 bits + + +### Block 0 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 3 | blk.0.attn_k.weight | Block 0 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | IQ4_XS | 4.2500 | +| 4 | blk.0.attn_norm.weight | Block 0 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 5 | blk.0.attn_output.weight | Block 0 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 6 | blk.0.attn_q.weight | Block 0 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | IQ4_XS | 4.2500 | +| 7 | blk.0.attn_v.weight | Block 0 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 8 | blk.0.ffn_down.weight | Block 0 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 9 | blk.0.ffn_gate.weight | Block 0 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 10 | blk.0.ffn_norm.weight | Block 0 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 11 | blk.0.ffn_up.weight | Block 0 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.0: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.0: 4.6932 bits + + +### Block 1 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 12 | blk.1.attn_k.weight | Block 1 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 13 | blk.1.attn_norm.weight | Block 1 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 14 | blk.1.attn_output.weight | Block 1 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 15 | blk.1.attn_q.weight | Block 1 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 16 | blk.1.attn_v.weight | Block 1 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 17 | blk.1.ffn_down.weight | Block 1 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 18 | blk.1.ffn_gate.weight | Block 1 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 19 | blk.1.ffn_norm.weight | Block 1 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 20 | blk.1.ffn_up.weight | Block 1 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.1: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.1: 5.4800 bits + + +### Block 2 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 21 | blk.2.attn_k.weight | Block 2 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 22 | blk.2.attn_norm.weight | Block 2 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 23 | blk.2.attn_output.weight | Block 2 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 24 | blk.2.attn_q.weight | Block 2 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 25 | blk.2.attn_v.weight | Block 2 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 26 | blk.2.ffn_down.weight | Block 2 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 27 | blk.2.ffn_gate.weight | Block 2 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 28 | blk.2.ffn_norm.weight | Block 2 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 29 | blk.2.ffn_up.weight | Block 2 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.2: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.2: 5.4800 bits + + +### Block 3 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 30 | blk.3.attn_k.weight | Block 3 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 31 | blk.3.attn_norm.weight | Block 3 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 32 | blk.3.attn_output.weight | Block 3 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 33 | blk.3.attn_q.weight | Block 3 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 34 | blk.3.attn_v.weight | Block 3 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 35 | blk.3.ffn_down.weight | Block 3 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 36 | blk.3.ffn_gate.weight | Block 3 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 37 | blk.3.ffn_norm.weight | Block 3 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 38 | blk.3.ffn_up.weight | Block 3 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.3: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.3: 5.1327 bits + + +### Block 4 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 39 | blk.4.attn_k.weight | Block 4 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 40 | blk.4.attn_norm.weight | Block 4 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 41 | blk.4.attn_output.weight | Block 4 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 42 | blk.4.attn_q.weight | Block 4 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 43 | blk.4.attn_v.weight | Block 4 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 44 | blk.4.ffn_down.weight | Block 4 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 45 | blk.4.ffn_gate.weight | Block 4 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 46 | blk.4.ffn_norm.weight | Block 4 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 47 | blk.4.ffn_up.weight | Block 4 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.4: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.4: 5.5011 bits + + +### Block 5 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 48 | blk.5.attn_k.weight | Block 5 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 49 | blk.5.attn_norm.weight | Block 5 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 50 | blk.5.attn_output.weight | Block 5 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 51 | blk.5.attn_q.weight | Block 5 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 52 | blk.5.attn_v.weight | Block 5 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 53 | blk.5.ffn_down.weight | Block 5 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 54 | blk.5.ffn_gate.weight | Block 5 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 55 | blk.5.ffn_norm.weight | Block 5 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 56 | blk.5.ffn_up.weight | Block 5 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.5: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.5: 5.2169 bits + + +### Block 6 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 57 | blk.6.attn_k.weight | Block 6 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 58 | blk.6.attn_norm.weight | Block 6 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 59 | blk.6.attn_output.weight | Block 6 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 60 | blk.6.attn_q.weight | Block 6 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 61 | blk.6.attn_v.weight | Block 6 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_1 | 6.0000 | +| 62 | blk.6.ffn_down.weight | Block 6 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 63 | blk.6.ffn_gate.weight | Block 6 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 64 | blk.6.ffn_norm.weight | Block 6 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 65 | blk.6.ffn_up.weight | Block 6 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.6: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.6: 5.4906 bits + + +### Block 7 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 66 | blk.7.attn_k.weight | Block 7 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 67 | blk.7.attn_norm.weight | Block 7 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 68 | blk.7.attn_output.weight | Block 7 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 69 | blk.7.attn_q.weight | Block 7 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 70 | blk.7.attn_v.weight | Block 7 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 71 | blk.7.ffn_down.weight | Block 7 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 72 | blk.7.ffn_gate.weight | Block 7 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 73 | blk.7.ffn_norm.weight | Block 7 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 74 | blk.7.ffn_up.weight | Block 7 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.7: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.7: 5.5011 bits + + +### Block 8 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 75 | blk.8.attn_k.weight | Block 8 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 76 | blk.8.attn_norm.weight | Block 8 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 77 | blk.8.attn_output.weight | Block 8 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 78 | blk.8.attn_q.weight | Block 8 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 79 | blk.8.attn_v.weight | Block 8 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 80 | blk.8.ffn_down.weight | Block 8 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 81 | blk.8.ffn_gate.weight | Block 8 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 82 | blk.8.ffn_norm.weight | Block 8 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 83 | blk.8.ffn_up.weight | Block 8 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.8: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.8: 5.4800 bits + + +### Block 9 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 84 | blk.9.attn_k.weight | Block 9 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 85 | blk.9.attn_norm.weight | Block 9 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 86 | blk.9.attn_output.weight | Block 9 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 87 | blk.9.attn_q.weight | Block 9 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 88 | blk.9.attn_v.weight | Block 9 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 89 | blk.9.ffn_down.weight | Block 9 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 90 | blk.9.ffn_gate.weight | Block 9 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 91 | blk.9.ffn_norm.weight | Block 9 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 92 | blk.9.ffn_up.weight | Block 9 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.9: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.9: 5.4800 bits + + +### Block 10 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 93 | blk.10.attn_k.weight | Block 10 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 94 | blk.10.attn_norm.weight | Block 10 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 95 | blk.10.attn_output.weight | Block 10 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 96 | blk.10.attn_q.weight | Block 10 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 97 | blk.10.attn_v.weight | Block 10 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 98 | blk.10.ffn_down.weight | Block 10 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 99 | blk.10.ffn_gate.weight | Block 10 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 100 | blk.10.ffn_norm.weight | Block 10 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 101 | blk.10.ffn_up.weight | Block 10 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.10: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.10: 5.4800 bits + + +### Block 11 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 102 | blk.11.attn_k.weight | Block 11 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 103 | blk.11.attn_norm.weight | Block 11 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 104 | blk.11.attn_output.weight | Block 11 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 105 | blk.11.attn_q.weight | Block 11 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 106 | blk.11.attn_v.weight | Block 11 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 107 | blk.11.ffn_down.weight | Block 11 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 108 | blk.11.ffn_gate.weight | Block 11 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 109 | blk.11.ffn_norm.weight | Block 11 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 110 | blk.11.ffn_up.weight | Block 11 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.11: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.11: 5.1511 bits + + +### Block 12 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-------|--------:| +| 111 | blk.12.attn_k.weight | Block 12 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 112 | blk.12.attn_norm.weight | Block 12 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 113 | blk.12.attn_output.weight | Block 12 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 114 | blk.12.attn_q.weight | Block 12 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 115 | blk.12.attn_v.weight | Block 12 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 116 | blk.12.ffn_down.weight | Block 12 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 117 | blk.12.ffn_gate.weight | Block 12 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | IQ4_XS | 4.2500 | +| 118 | blk.12.ffn_norm.weight | Block 12 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 119 | blk.12.ffn_up.weight | Block 12 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.12: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.12: 5.1511 bits + + +### Block 13 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 120 | blk.13.attn_k.weight | Block 13 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 121 | blk.13.attn_norm.weight | Block 13 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 122 | blk.13.attn_output.weight | Block 13 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 123 | blk.13.attn_q.weight | Block 13 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 124 | blk.13.attn_v.weight | Block 13 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 125 | blk.13.ffn_down.weight | Block 13 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 126 | blk.13.ffn_gate.weight | Block 13 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 127 | blk.13.ffn_norm.weight | Block 13 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 128 | blk.13.ffn_up.weight | Block 13 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.13: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.13: 5.5011 bits + + +### Block 14 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 129 | blk.14.attn_k.weight | Block 14 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 130 | blk.14.attn_norm.weight | Block 14 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 131 | blk.14.attn_output.weight | Block 14 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 132 | blk.14.attn_q.weight | Block 14 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 133 | blk.14.attn_v.weight | Block 14 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 134 | blk.14.ffn_down.weight | Block 14 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 135 | blk.14.ffn_gate.weight | Block 14 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 136 | blk.14.ffn_norm.weight | Block 14 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 137 | blk.14.ffn_up.weight | Block 14 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.14: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.14: 5.5011 bits + + +### Block 15 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 138 | blk.15.attn_k.weight | Block 15 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 139 | blk.15.attn_norm.weight | Block 15 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 140 | blk.15.attn_output.weight | Block 15 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 141 | blk.15.attn_q.weight | Block 15 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 142 | blk.15.attn_v.weight | Block 15 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 143 | blk.15.ffn_down.weight | Block 15 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 144 | blk.15.ffn_gate.weight | Block 15 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 145 | blk.15.ffn_norm.weight | Block 15 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 146 | blk.15.ffn_up.weight | Block 15 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.15: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.15: 5.5011 bits + + +### Block 16 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 147 | blk.16.attn_k.weight | Block 16 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 148 | blk.16.attn_norm.weight | Block 16 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 149 | blk.16.attn_output.weight | Block 16 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 150 | blk.16.attn_q.weight | Block 16 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 151 | blk.16.attn_v.weight | Block 16 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 152 | blk.16.ffn_down.weight | Block 16 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 153 | blk.16.ffn_gate.weight | Block 16 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 154 | blk.16.ffn_norm.weight | Block 16 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 155 | blk.16.ffn_up.weight | Block 16 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.16: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.16: 5.5011 bits + + +### Block 17 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 156 | blk.17.attn_k.weight | Block 17 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 157 | blk.17.attn_norm.weight | Block 17 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 158 | blk.17.attn_output.weight | Block 17 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 159 | blk.17.attn_q.weight | Block 17 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 160 | blk.17.attn_v.weight | Block 17 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 161 | blk.17.ffn_down.weight | Block 17 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 162 | blk.17.ffn_gate.weight | Block 17 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 163 | blk.17.ffn_norm.weight | Block 17 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 164 | blk.17.ffn_up.weight | Block 17 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.17: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.17: 5.5011 bits + + +### Block 18 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 165 | blk.18.attn_k.weight | Block 18 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 166 | blk.18.attn_norm.weight | Block 18 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 167 | blk.18.attn_output.weight | Block 18 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 168 | blk.18.attn_q.weight | Block 18 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 169 | blk.18.attn_v.weight | Block 18 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 170 | blk.18.ffn_down.weight | Block 18 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 171 | blk.18.ffn_gate.weight | Block 18 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 172 | blk.18.ffn_norm.weight | Block 18 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 173 | blk.18.ffn_up.weight | Block 18 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.18: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.18: 5.4800 bits + + +### Block 19 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 174 | blk.19.attn_k.weight | Block 19 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 175 | blk.19.attn_norm.weight | Block 19 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 176 | blk.19.attn_output.weight | Block 19 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 177 | blk.19.attn_q.weight | Block 19 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 178 | blk.19.attn_v.weight | Block 19 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 179 | blk.19.ffn_down.weight | Block 19 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 180 | blk.19.ffn_gate.weight | Block 19 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 181 | blk.19.ffn_norm.weight | Block 19 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 182 | blk.19.ffn_up.weight | Block 19 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.19: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.19: 5.4800 bits + + +### Block 20 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 183 | blk.20.attn_k.weight | Block 20 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 184 | blk.20.attn_norm.weight | Block 20 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 185 | blk.20.attn_output.weight | Block 20 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 186 | blk.20.attn_q.weight | Block 20 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 187 | blk.20.attn_v.weight | Block 20 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 188 | blk.20.ffn_down.weight | Block 20 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 189 | blk.20.ffn_gate.weight | Block 20 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 190 | blk.20.ffn_norm.weight | Block 20 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 191 | blk.20.ffn_up.weight | Block 20 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.20: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.20: 5.5011 bits + + +### Block 21 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 192 | blk.21.attn_k.weight | Block 21 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 193 | blk.21.attn_norm.weight | Block 21 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 194 | blk.21.attn_output.weight | Block 21 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 195 | blk.21.attn_q.weight | Block 21 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 196 | blk.21.attn_v.weight | Block 21 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 197 | blk.21.ffn_down.weight | Block 21 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 198 | blk.21.ffn_gate.weight | Block 21 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 199 | blk.21.ffn_norm.weight | Block 21 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 200 | blk.21.ffn_up.weight | Block 21 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.21: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.21: 5.4800 bits + + +### Block 22 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 201 | blk.22.attn_k.weight | Block 22 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 202 | blk.22.attn_norm.weight | Block 22 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 203 | blk.22.attn_output.weight | Block 22 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 204 | blk.22.attn_q.weight | Block 22 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 205 | blk.22.attn_v.weight | Block 22 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 206 | blk.22.ffn_down.weight | Block 22 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 207 | blk.22.ffn_gate.weight | Block 22 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 208 | blk.22.ffn_norm.weight | Block 22 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 209 | blk.22.ffn_up.weight | Block 22 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.22: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.22: 5.5011 bits + + +### Block 23 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 210 | blk.23.attn_k.weight | Block 23 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 211 | blk.23.attn_norm.weight | Block 23 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 212 | blk.23.attn_output.weight | Block 23 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 213 | blk.23.attn_q.weight | Block 23 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 214 | blk.23.attn_v.weight | Block 23 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 215 | blk.23.ffn_down.weight | Block 23 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 216 | blk.23.ffn_gate.weight | Block 23 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 217 | blk.23.ffn_norm.weight | Block 23 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 218 | blk.23.ffn_up.weight | Block 23 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.23: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.23: 5.4800 bits + + +### Block 24 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 219 | blk.24.attn_k.weight | Block 24 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 220 | blk.24.attn_norm.weight | Block 24 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 221 | blk.24.attn_output.weight | Block 24 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 222 | blk.24.attn_q.weight | Block 24 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 223 | blk.24.attn_v.weight | Block 24 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 224 | blk.24.ffn_down.weight | Block 24 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 225 | blk.24.ffn_gate.weight | Block 24 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 226 | blk.24.ffn_norm.weight | Block 24 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 227 | blk.24.ffn_up.weight | Block 24 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.24: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.24: 5.5695 bits + + +### Block 25 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 228 | blk.25.attn_k.weight | Block 25 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 229 | blk.25.attn_norm.weight | Block 25 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 230 | blk.25.attn_output.weight | Block 25 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 231 | blk.25.attn_q.weight | Block 25 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 232 | blk.25.attn_v.weight | Block 25 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 233 | blk.25.ffn_down.weight | Block 25 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 234 | blk.25.ffn_gate.weight | Block 25 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 235 | blk.25.ffn_norm.weight | Block 25 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 236 | blk.25.ffn_up.weight | Block 25 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.25: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.25: 5.4800 bits + + +### Block 26 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 237 | blk.26.attn_k.weight | Block 26 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 238 | blk.26.attn_norm.weight | Block 26 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 239 | blk.26.attn_output.weight | Block 26 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 240 | blk.26.attn_q.weight | Block 26 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 241 | blk.26.attn_v.weight | Block 26 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 242 | blk.26.ffn_down.weight | Block 26 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 243 | blk.26.ffn_gate.weight | Block 26 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 244 | blk.26.ffn_norm.weight | Block 26 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 245 | blk.26.ffn_up.weight | Block 26 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.26: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.26: 5.4800 bits + + +### Block 27 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 246 | blk.27.attn_k.weight | Block 27 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 247 | blk.27.attn_norm.weight | Block 27 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 248 | blk.27.attn_output.weight | Block 27 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 249 | blk.27.attn_q.weight | Block 27 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 250 | blk.27.attn_v.weight | Block 27 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 251 | blk.27.ffn_down.weight | Block 27 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 252 | blk.27.ffn_gate.weight | Block 27 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 253 | blk.27.ffn_norm.weight | Block 27 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 254 | blk.27.ffn_up.weight | Block 27 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.27: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.27: 5.5011 bits + + +### Block 28 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 255 | blk.28.attn_k.weight | Block 28 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 256 | blk.28.attn_norm.weight | Block 28 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 257 | blk.28.attn_output.weight | Block 28 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 258 | blk.28.attn_q.weight | Block 28 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 259 | blk.28.attn_v.weight | Block 28 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 260 | blk.28.ffn_down.weight | Block 28 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 261 | blk.28.ffn_gate.weight | Block 28 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 262 | blk.28.ffn_norm.weight | Block 28 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 263 | blk.28.ffn_up.weight | Block 28 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.28: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.28: 5.5011 bits + + +### Block 29 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 264 | blk.29.attn_k.weight | Block 29 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 265 | blk.29.attn_norm.weight | Block 29 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 266 | blk.29.attn_output.weight | Block 29 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 267 | blk.29.attn_q.weight | Block 29 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 268 | blk.29.attn_v.weight | Block 29 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 269 | blk.29.ffn_down.weight | Block 29 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 270 | blk.29.ffn_gate.weight | Block 29 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 271 | blk.29.ffn_norm.weight | Block 29 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 272 | blk.29.ffn_up.weight | Block 29 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.29: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.29: 5.5011 bits + + +### Block 30 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 273 | blk.30.attn_k.weight | Block 30 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 274 | blk.30.attn_norm.weight | Block 30 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 275 | blk.30.attn_output.weight | Block 30 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 276 | blk.30.attn_q.weight | Block 30 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 277 | blk.30.attn_v.weight | Block 30 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 278 | blk.30.ffn_down.weight | Block 30 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 279 | blk.30.ffn_gate.weight | Block 30 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 280 | blk.30.ffn_norm.weight | Block 30 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 281 | blk.30.ffn_up.weight | Block 30 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.30: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.30: 5.5011 bits + + +### Block 31 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 282 | blk.31.attn_k.weight | Block 31 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 283 | blk.31.attn_norm.weight | Block 31 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 284 | blk.31.attn_output.weight | Block 31 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 285 | blk.31.attn_q.weight | Block 31 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 286 | blk.31.attn_v.weight | Block 31 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 287 | blk.31.ffn_down.weight | Block 31 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 288 | blk.31.ffn_gate.weight | Block 31 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 289 | blk.31.ffn_norm.weight | Block 31 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 290 | blk.31.ffn_up.weight | Block 31 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.31: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.31: 5.5906 bits + + +### Block 32 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 291 | blk.32.attn_k.weight | Block 32 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 292 | blk.32.attn_norm.weight | Block 32 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 293 | blk.32.attn_output.weight | Block 32 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 294 | blk.32.attn_q.weight | Block 32 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 295 | blk.32.attn_v.weight | Block 32 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 296 | blk.32.ffn_down.weight | Block 32 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 297 | blk.32.ffn_gate.weight | Block 32 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 298 | blk.32.ffn_norm.weight | Block 32 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 299 | blk.32.ffn_up.weight | Block 32 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.32: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.32: 5.5011 bits + + +### Block 33 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 300 | blk.33.attn_k.weight | Block 33 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 301 | blk.33.attn_norm.weight | Block 33 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 302 | blk.33.attn_output.weight | Block 33 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 303 | blk.33.attn_q.weight | Block 33 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 304 | blk.33.attn_v.weight | Block 33 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 305 | blk.33.ffn_down.weight | Block 33 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 306 | blk.33.ffn_gate.weight | Block 33 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 307 | blk.33.ffn_norm.weight | Block 33 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 308 | blk.33.ffn_up.weight | Block 33 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.33: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.33: 5.5906 bits + + +### Block 34 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 309 | blk.34.attn_k.weight | Block 34 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 310 | blk.34.attn_norm.weight | Block 34 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 311 | blk.34.attn_output.weight | Block 34 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 312 | blk.34.attn_q.weight | Block 34 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 313 | blk.34.attn_v.weight | Block 34 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 314 | blk.34.ffn_down.weight | Block 34 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 315 | blk.34.ffn_gate.weight | Block 34 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 316 | blk.34.ffn_norm.weight | Block 34 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 317 | blk.34.ffn_up.weight | Block 34 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.34: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.34: 5.5011 bits + + +### Block 35 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 318 | blk.35.attn_k.weight | Block 35 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 319 | blk.35.attn_norm.weight | Block 35 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 320 | blk.35.attn_output.weight | Block 35 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 321 | blk.35.attn_q.weight | Block 35 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 322 | blk.35.attn_v.weight | Block 35 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 323 | blk.35.ffn_down.weight | Block 35 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 324 | blk.35.ffn_gate.weight | Block 35 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 325 | blk.35.ffn_norm.weight | Block 35 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 326 | blk.35.ffn_up.weight | Block 35 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.35: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.35: 5.4800 bits + + +### Block 36 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 327 | blk.36.attn_k.weight | Block 36 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 328 | blk.36.attn_norm.weight | Block 36 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 329 | blk.36.attn_output.weight | Block 36 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 330 | blk.36.attn_q.weight | Block 36 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 331 | blk.36.attn_v.weight | Block 36 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 332 | blk.36.ffn_down.weight | Block 36 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 333 | blk.36.ffn_gate.weight | Block 36 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 334 | blk.36.ffn_norm.weight | Block 36 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 335 | blk.36.ffn_up.weight | Block 36 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.36: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.36: 5.4800 bits + + +### Block 37 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 336 | blk.37.attn_k.weight | Block 37 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 337 | blk.37.attn_norm.weight | Block 37 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 338 | blk.37.attn_output.weight | Block 37 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 339 | blk.37.attn_q.weight | Block 37 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 340 | blk.37.attn_v.weight | Block 37 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 341 | blk.37.ffn_down.weight | Block 37 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 342 | blk.37.ffn_gate.weight | Block 37 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 343 | blk.37.ffn_norm.weight | Block 37 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 344 | blk.37.ffn_up.weight | Block 37 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.37: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.37: 5.4800 bits + + +### Block 38 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 345 | blk.38.attn_k.weight | Block 38 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 346 | blk.38.attn_norm.weight | Block 38 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 347 | blk.38.attn_output.weight | Block 38 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 348 | blk.38.attn_q.weight | Block 38 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 349 | blk.38.attn_v.weight | Block 38 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 350 | blk.38.ffn_down.weight | Block 38 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 351 | blk.38.ffn_gate.weight | Block 38 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 352 | blk.38.ffn_norm.weight | Block 38 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 353 | blk.38.ffn_up.weight | Block 38 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.38: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.38: 5.4800 bits + + +### Block 39 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 354 | blk.39.attn_k.weight | Block 39 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 355 | blk.39.attn_norm.weight | Block 39 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 356 | blk.39.attn_output.weight | Block 39 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 357 | blk.39.attn_q.weight | Block 39 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 358 | blk.39.attn_v.weight | Block 39 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 359 | blk.39.ffn_down.weight | Block 39 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 360 | blk.39.ffn_gate.weight | Block 39 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 361 | blk.39.ffn_norm.weight | Block 39 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 362 | blk.39.ffn_up.weight | Block 39 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | + +- Total elements in blk.39: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.39: 5.5011 bits + + +Total BPW for granite-4.1-8b-Q5_K.gguf: 5.4999 bits diff --git a/scores/granite-4.1-8b-Q6_K.md b/scores/granite-4.1-8b-Q6_K.md new file mode 100644 index 0000000..a849ec8 --- /dev/null +++ b/scores/granite-4.1-8b-Q6_K.md @@ -0,0 +1,1247 @@ +# granite-4.1-8b-WIP/granite-4.1-8b-Q6_K.gguf - GGUF Internal File Dump + +- Endian: LITTLE endian + +## Key Value Metadata Store + +There are 42 key-value pairs in this file + +| POS | TYPE | Count | Key | Value | +|----:|:---------|-------:|:-----------------------------------------|:--------------------------------------------------------------------| +| 1 | UINT32 | 1 | GGUF.version | 3 | +| 2 | UINT64 | 1 | GGUF.tensor_count | 363 | +| 3 | UINT64 | 1 | GGUF.kv_count | 39 | +| 4 | STRING | 1 | general.architecture | `granite` | +| 5 | STRING | 1 | general.type | `model` | +| 6 | STRING | 1 | general.name | `Granite 4.1 8b` | +| 7 | STRING | 1 | general.basename | `granite-4.1` | +| 8 | STRING | 1 | general.size_label | `8B` | +| 9 | STRING | 1 | general.license | `apache-2.0` | +| 10 | [STRING] | 2 | general.tags | [ `language`, `granite-4.1` ] | +| 11 | UINT32 | 1 | granite.block_count | 40 | +| 12 | UINT32 | 1 | granite.context_length | 131072 | +| 13 | UINT32 | 1 | granite.embedding_length | 4096 | +| 14 | UINT32 | 1 | granite.feed_forward_length | 12800 | +| 15 | UINT32 | 1 | granite.attention.head_count | 32 | +| 16 | UINT32 | 1 | granite.attention.head_count_kv | 8 | +| 17 | FLOAT32 | 1 | granite.rope.freq_base | 10000000.0 | +| 18 | FLOAT32 | 1 | granite.attention.layer_norm_rms_epsilon | 1e-05 | +| 19 | UINT32 | 1 | granite.vocab_size | 100352 | +| 20 | UINT32 | 1 | granite.rope.dimension_count | 128 | +| 21 | FLOAT32 | 1 | granite.attention.scale | 0.0078125 | +| 22 | FLOAT32 | 1 | granite.embedding_scale | 12.0 | +| 23 | FLOAT32 | 1 | granite.residual_scale | 0.22 | +| 24 | FLOAT32 | 1 | granite.logit_scale | 16.0 | +| 25 | STRING | 1 | tokenizer.ggml.model | `gpt2` | +| 26 | STRING | 1 | tokenizer.ggml.pre | `granite-docling` | +| 27 | [STRING] | 100352 | tokenizer.ggml.tokens | [ `!`, `"`, `#`, `$`, `%`, ... ] | +| 28 | [INT32] | 100352 | tokenizer.ggml.token_type | [ 1, 1, 1, 1, 1, 1, 1, ... ] | +| 29 | [STRING] | 100000 | tokenizer.ggml.merges | [ `Ġ Ġ`, `ĠĠ ĠĠ`, `i n`, `Ġ t`, `ĠĠĠĠ ĠĠĠĠ`, ... ] | +| 30 | UINT32 | 1 | tokenizer.ggml.bos_token_id | 100257 | +| 31 | UINT32 | 1 | tokenizer.ggml.eos_token_id | 100257 | +| 32 | UINT32 | 1 | tokenizer.ggml.unknown_token_id | 100269 | +| 33 | UINT32 | 1 | tokenizer.ggml.padding_token_id | 100256 | +| 34 | BOOL | 1 | tokenizer.ggml.add_bos_token | False | +| 35 | STRING | 1 | tokenizer.chat_template | `{%- set tools_system_message_p`...`end_of_role|>' }}{%- endif %}` | +| 36 | BOOL | 1 | tokenizer.ggml.add_space_prefix | False | +| 37 | UINT32 | 1 | general.quantization_version | 2 | +| 38 | UINT32 | 1 | general.file_type | 18 | +| 39 | STRING | 1 | quantize.imatrix.file | `imatrix/imatrix-granite-4.1-8b-medium.gguf` | +| 40 | STRING | 1 | quantize.imatrix.dataset | `../../datasets/imatrix/combined_all_medium.txt` | +| 41 | UINT32 | 1 | quantize.imatrix.entries_count | 280 | +| 42 | UINT32 | 1 | quantize.imatrix.chunks_count | 3601 | + +## Tensors Overview ~9B Elements + +Total number of elements in all tensors: 8791592960 Elements + +- [granite-4.1-8b-WIP/granite-4.1-8b-Q6\_K.gguf - GGUF Internal File Dump](#granite-41-8b-wipgranite-41-8b-q6_kgguf---gguf-internal-file-dump) + - [Key Value Metadata Store](#key-value-metadata-store) + - [Tensors Overview ~9B Elements](#tensors-overview-9b-elements) + - [Tensor Data Offset](#tensor-data-offset) + - [Base Tensor Group : ~822M Elements](#base-tensor-group--822m-elements) + - [Block 0 Tensor Group : ~199M Elements](#block-0-tensor-group--199m-elements) + - [Block 1 Tensor Group : ~199M Elements](#block-1-tensor-group--199m-elements) + - [Block 2 Tensor Group : ~199M Elements](#block-2-tensor-group--199m-elements) + - [Block 3 Tensor Group : ~199M Elements](#block-3-tensor-group--199m-elements) + - [Block 4 Tensor Group : ~199M Elements](#block-4-tensor-group--199m-elements) + - [Block 5 Tensor Group : ~199M Elements](#block-5-tensor-group--199m-elements) + - [Block 6 Tensor Group : ~199M Elements](#block-6-tensor-group--199m-elements) + - [Block 7 Tensor Group : ~199M Elements](#block-7-tensor-group--199m-elements) + - [Block 8 Tensor Group : ~199M Elements](#block-8-tensor-group--199m-elements) + - [Block 9 Tensor Group : ~199M Elements](#block-9-tensor-group--199m-elements) + - [Block 10 Tensor Group : ~199M Elements](#block-10-tensor-group--199m-elements) + - [Block 11 Tensor Group : ~199M Elements](#block-11-tensor-group--199m-elements) + - [Block 12 Tensor Group : ~199M Elements](#block-12-tensor-group--199m-elements) + - [Block 13 Tensor Group : ~199M Elements](#block-13-tensor-group--199m-elements) + - [Block 14 Tensor Group : ~199M Elements](#block-14-tensor-group--199m-elements) + - [Block 15 Tensor Group : ~199M Elements](#block-15-tensor-group--199m-elements) + - [Block 16 Tensor Group : ~199M Elements](#block-16-tensor-group--199m-elements) + - [Block 17 Tensor Group : ~199M Elements](#block-17-tensor-group--199m-elements) + - [Block 18 Tensor Group : ~199M Elements](#block-18-tensor-group--199m-elements) + - [Block 19 Tensor Group : ~199M Elements](#block-19-tensor-group--199m-elements) + - [Block 20 Tensor Group : ~199M Elements](#block-20-tensor-group--199m-elements) + - [Block 21 Tensor Group : ~199M Elements](#block-21-tensor-group--199m-elements) + - [Block 22 Tensor Group : ~199M Elements](#block-22-tensor-group--199m-elements) + - [Block 23 Tensor Group : ~199M Elements](#block-23-tensor-group--199m-elements) + - [Block 24 Tensor Group : ~199M Elements](#block-24-tensor-group--199m-elements) + - [Block 25 Tensor Group : ~199M Elements](#block-25-tensor-group--199m-elements) + - [Block 26 Tensor Group : ~199M Elements](#block-26-tensor-group--199m-elements) + - [Block 27 Tensor Group : ~199M Elements](#block-27-tensor-group--199m-elements) + - [Block 28 Tensor Group : ~199M Elements](#block-28-tensor-group--199m-elements) + - [Block 29 Tensor Group : ~199M Elements](#block-29-tensor-group--199m-elements) + - [Block 30 Tensor Group : ~199M Elements](#block-30-tensor-group--199m-elements) + - [Block 31 Tensor Group : ~199M Elements](#block-31-tensor-group--199m-elements) + - [Block 32 Tensor Group : ~199M Elements](#block-32-tensor-group--199m-elements) + - [Block 33 Tensor Group : ~199M Elements](#block-33-tensor-group--199m-elements) + - [Block 34 Tensor Group : ~199M Elements](#block-34-tensor-group--199m-elements) + - [Block 35 Tensor Group : ~199M Elements](#block-35-tensor-group--199m-elements) + - [Block 36 Tensor Group : ~199M Elements](#block-36-tensor-group--199m-elements) + - [Block 37 Tensor Group : ~199M Elements](#block-37-tensor-group--199m-elements) + - [Block 38 Tensor Group : ~199M Elements](#block-38-tensor-group--199m-elements) + - [Block 39 Tensor Group : ~199M Elements](#block-39-tensor-group--199m-elements) + +### Tensor Data Offset + +This table contains the offset and data segment relative to start of file + +| T_ID | Tensor Layer Name | Data Offset (B) | Data Size (B) | +|-----:|:--------------------------|-----------------:|-----------------:| +| 0 | output.weight | 0x367400 | 0x1a080000 | +| 1 | output_norm.weight | 0x1a3e7400 | 0x4000 | +| 2 | token_embd.weight | 0x1a3eb400 | 0x14190000 | +| 3 | blk.0.attn_k.weight | 0x2e57b400 | 0x240000 | +| 4 | blk.0.attn_norm.weight | 0x2e7bb400 | 0x4000 | +| 5 | blk.0.attn_output.weight | 0x2e7bf400 | 0xb00000 | +| 6 | blk.0.attn_q.weight | 0x2f2bf400 | 0x900000 | +| 7 | blk.0.attn_v.weight | 0x2fbbf400 | 0x348000 | +| 8 | blk.0.ffn_down.weight | 0x2ff07400 | 0x2260000 | +| 9 | blk.0.ffn_gate.weight | 0x32167400 | 0x2260000 | +| 10 | blk.0.ffn_norm.weight | 0x343c7400 | 0x4000 | +| 11 | blk.0.ffn_up.weight | 0x343cb400 | 0x2904000 | +| 12 | blk.1.attn_k.weight | 0x36ccf400 | 0x2c0000 | +| 13 | blk.1.attn_norm.weight | 0x36f8f400 | 0x4000 | +| 14 | blk.1.attn_output.weight | 0x36f93400 | 0xd20000 | +| 15 | blk.1.attn_q.weight | 0x37cb3400 | 0xb00000 | +| 16 | blk.1.attn_v.weight | 0x387b3400 | 0x440000 | +| 17 | blk.1.ffn_down.weight | 0x38bf3400 | 0x2904000 | +| 18 | blk.1.ffn_gate.weight | 0x3b4f7400 | 0x2904000 | +| 19 | blk.1.ffn_norm.weight | 0x3ddfb400 | 0x4000 | +| 20 | blk.1.ffn_up.weight | 0x3ddff400 | 0x2904000 | +| 21 | blk.2.attn_k.weight | 0x40703400 | 0x2c0000 | +| 22 | blk.2.attn_norm.weight | 0x409c3400 | 0x4000 | +| 23 | blk.2.attn_output.weight | 0x409c7400 | 0xd20000 | +| 24 | blk.2.attn_q.weight | 0x416e7400 | 0xb00000 | +| 25 | blk.2.attn_v.weight | 0x421e7400 | 0x440000 | +| 26 | blk.2.ffn_down.weight | 0x42627400 | 0x2904000 | +| 27 | blk.2.ffn_gate.weight | 0x44f2b400 | 0x2904000 | +| 28 | blk.2.ffn_norm.weight | 0x4782f400 | 0x4000 | +| 29 | blk.2.ffn_up.weight | 0x47833400 | 0x2904000 | +| 30 | blk.3.attn_k.weight | 0x4a137400 | 0x2c0000 | +| 31 | blk.3.attn_norm.weight | 0x4a3f7400 | 0x4000 | +| 32 | blk.3.attn_output.weight | 0x4a3fb400 | 0xd20000 | +| 33 | blk.3.attn_q.weight | 0x4b11b400 | 0xb00000 | +| 34 | blk.3.attn_v.weight | 0x4bc1b400 | 0x440000 | +| 35 | blk.3.ffn_down.weight | 0x4c05b400 | 0x2260000 | +| 36 | blk.3.ffn_gate.weight | 0x4e2bb400 | 0x2904000 | +| 37 | blk.3.ffn_norm.weight | 0x50bbf400 | 0x4000 | +| 38 | blk.3.ffn_up.weight | 0x50bc3400 | 0x2904000 | +| 39 | blk.4.attn_k.weight | 0x534c7400 | 0x2c0000 | +| 40 | blk.4.attn_norm.weight | 0x53787400 | 0x4000 | +| 41 | blk.4.attn_output.weight | 0x5378b400 | 0xd20000 | +| 42 | blk.4.attn_q.weight | 0x544ab400 | 0xd20000 | +| 43 | blk.4.attn_v.weight | 0x551cb400 | 0x440000 | +| 44 | blk.4.ffn_down.weight | 0x5560b400 | 0x2260000 | +| 45 | blk.4.ffn_gate.weight | 0x5786b400 | 0x2904000 | +| 46 | blk.4.ffn_norm.weight | 0x5a16f400 | 0x4000 | +| 47 | blk.4.ffn_up.weight | 0x5a173400 | 0x2904000 | +| 48 | blk.5.attn_k.weight | 0x5ca77400 | 0x2c0000 | +| 49 | blk.5.attn_norm.weight | 0x5cd37400 | 0x4000 | +| 50 | blk.5.attn_output.weight | 0x5cd3b400 | 0xd20000 | +| 51 | blk.5.attn_q.weight | 0x5da5b400 | 0xb00000 | +| 52 | blk.5.attn_v.weight | 0x5e55b400 | 0x440000 | +| 53 | blk.5.ffn_down.weight | 0x5e99b400 | 0x2260000 | +| 54 | blk.5.ffn_gate.weight | 0x60bfb400 | 0x2904000 | +| 55 | blk.5.ffn_norm.weight | 0x634ff400 | 0x4000 | +| 56 | blk.5.ffn_up.weight | 0x63503400 | 0x2904000 | +| 57 | blk.6.attn_k.weight | 0x65e07400 | 0x2c0000 | +| 58 | blk.6.attn_norm.weight | 0x660c7400 | 0x4000 | +| 59 | blk.6.attn_output.weight | 0x660cb400 | 0xd20000 | +| 60 | blk.6.attn_q.weight | 0x66deb400 | 0xb00000 | +| 61 | blk.6.attn_v.weight | 0x678eb400 | 0x440000 | +| 62 | blk.6.ffn_down.weight | 0x67d2b400 | 0x2260000 | +| 63 | blk.6.ffn_gate.weight | 0x69f8b400 | 0x2904000 | +| 64 | blk.6.ffn_norm.weight | 0x6c88f400 | 0x4000 | +| 65 | blk.6.ffn_up.weight | 0x6c893400 | 0x2904000 | +| 66 | blk.7.attn_k.weight | 0x6f197400 | 0x348000 | +| 67 | blk.7.attn_norm.weight | 0x6f4df400 | 0x4000 | +| 68 | blk.7.attn_output.weight | 0x6f4e3400 | 0xd20000 | +| 69 | blk.7.attn_q.weight | 0x70203400 | 0xd20000 | +| 70 | blk.7.attn_v.weight | 0x70f23400 | 0x440000 | +| 71 | blk.7.ffn_down.weight | 0x71363400 | 0x2904000 | +| 72 | blk.7.ffn_gate.weight | 0x73c67400 | 0x2904000 | +| 73 | blk.7.ffn_norm.weight | 0x7656b400 | 0x4000 | +| 74 | blk.7.ffn_up.weight | 0x7656f400 | 0x2904000 | +| 75 | blk.8.attn_k.weight | 0x78e73400 | 0x2c0000 | +| 76 | blk.8.attn_norm.weight | 0x79133400 | 0x4000 | +| 77 | blk.8.attn_output.weight | 0x79137400 | 0xd20000 | +| 78 | blk.8.attn_q.weight | 0x79e57400 | 0xb00000 | +| 79 | blk.8.attn_v.weight | 0x7a957400 | 0x440000 | +| 80 | blk.8.ffn_down.weight | 0x7ad97400 | 0x2260000 | +| 81 | blk.8.ffn_gate.weight | 0x7cff7400 | 0x2260000 | +| 82 | blk.8.ffn_norm.weight | 0x7f257400 | 0x4000 | +| 83 | blk.8.ffn_up.weight | 0x7f25b400 | 0x2904000 | +| 84 | blk.9.attn_k.weight | 0x81b5f400 | 0x2c0000 | +| 85 | blk.9.attn_norm.weight | 0x81e1f400 | 0x4000 | +| 86 | blk.9.attn_output.weight | 0x81e23400 | 0xd20000 | +| 87 | blk.9.attn_q.weight | 0x82b43400 | 0xb00000 | +| 88 | blk.9.attn_v.weight | 0x83643400 | 0x440000 | +| 89 | blk.9.ffn_down.weight | 0x83a83400 | 0x2260000 | +| 90 | blk.9.ffn_gate.weight | 0x85ce3400 | 0x2260000 | +| 91 | blk.9.ffn_norm.weight | 0x87f43400 | 0x4000 | +| 92 | blk.9.ffn_up.weight | 0x87f47400 | 0x2904000 | +| 93 | blk.10.attn_k.weight | 0x8a84b400 | 0x2c0000 | +| 94 | blk.10.attn_norm.weight | 0x8ab0b400 | 0x4000 | +| 95 | blk.10.attn_output.weight | 0x8ab0f400 | 0xd20000 | +| 96 | blk.10.attn_q.weight | 0x8b82f400 | 0xb00000 | +| 97 | blk.10.attn_v.weight | 0x8c32f400 | 0x440000 | +| 98 | blk.10.ffn_down.weight | 0x8c76f400 | 0x2260000 | +| 99 | blk.10.ffn_gate.weight | 0x8e9cf400 | 0x2260000 | +| 100 | blk.10.ffn_norm.weight | 0x90c2f400 | 0x4000 | +| 101 | blk.10.ffn_up.weight | 0x90c33400 | 0x2904000 | +| 102 | blk.11.attn_k.weight | 0x93537400 | 0x2c0000 | +| 103 | blk.11.attn_norm.weight | 0x937f7400 | 0x4000 | +| 104 | blk.11.attn_output.weight | 0x937fb400 | 0xd20000 | +| 105 | blk.11.attn_q.weight | 0x9451b400 | 0xb00000 | +| 106 | blk.11.attn_v.weight | 0x9501b400 | 0x440000 | +| 107 | blk.11.ffn_down.weight | 0x9545b400 | 0x2904000 | +| 108 | blk.11.ffn_gate.weight | 0x97d5f400 | 0x2260000 | +| 109 | blk.11.ffn_norm.weight | 0x99fbf400 | 0x4000 | +| 110 | blk.11.ffn_up.weight | 0x99fc3400 | 0x2904000 | +| 111 | blk.12.attn_k.weight | 0x9c8c7400 | 0x2c0000 | +| 112 | blk.12.attn_norm.weight | 0x9cb87400 | 0x4000 | +| 113 | blk.12.attn_output.weight | 0x9cb8b400 | 0xd20000 | +| 114 | blk.12.attn_q.weight | 0x9d8ab400 | 0xb00000 | +| 115 | blk.12.attn_v.weight | 0x9e3ab400 | 0x440000 | +| 116 | blk.12.ffn_down.weight | 0x9e7eb400 | 0x2904000 | +| 117 | blk.12.ffn_gate.weight | 0xa10ef400 | 0x2260000 | +| 118 | blk.12.ffn_norm.weight | 0xa334f400 | 0x4000 | +| 119 | blk.12.ffn_up.weight | 0xa3353400 | 0x2904000 | +| 120 | blk.13.attn_k.weight | 0xa5c57400 | 0x2c0000 | +| 121 | blk.13.attn_norm.weight | 0xa5f17400 | 0x4000 | +| 122 | blk.13.attn_output.weight | 0xa5f1b400 | 0xd20000 | +| 123 | blk.13.attn_q.weight | 0xa6c3b400 | 0xb00000 | +| 124 | blk.13.attn_v.weight | 0xa773b400 | 0x440000 | +| 125 | blk.13.ffn_down.weight | 0xa7b7b400 | 0x2904000 | +| 126 | blk.13.ffn_gate.weight | 0xaa47f400 | 0x2260000 | +| 127 | blk.13.ffn_norm.weight | 0xac6df400 | 0x4000 | +| 128 | blk.13.ffn_up.weight | 0xac6e3400 | 0x2904000 | +| 129 | blk.14.attn_k.weight | 0xaefe7400 | 0x2c0000 | +| 130 | blk.14.attn_norm.weight | 0xaf2a7400 | 0x4000 | +| 131 | blk.14.attn_output.weight | 0xaf2ab400 | 0xd20000 | +| 132 | blk.14.attn_q.weight | 0xaffcb400 | 0xb00000 | +| 133 | blk.14.attn_v.weight | 0xb0acb400 | 0x440000 | +| 134 | blk.14.ffn_down.weight | 0xb0f0b400 | 0x2904000 | +| 135 | blk.14.ffn_gate.weight | 0xb380f400 | 0x2904000 | +| 136 | blk.14.ffn_norm.weight | 0xb6113400 | 0x4000 | +| 137 | blk.14.ffn_up.weight | 0xb6117400 | 0x2904000 | +| 138 | blk.15.attn_k.weight | 0xb8a1b400 | 0x2c0000 | +| 139 | blk.15.attn_norm.weight | 0xb8cdb400 | 0x4000 | +| 140 | blk.15.attn_output.weight | 0xb8cdf400 | 0xd20000 | +| 141 | blk.15.attn_q.weight | 0xb99ff400 | 0xd20000 | +| 142 | blk.15.attn_v.weight | 0xba71f400 | 0x440000 | +| 143 | blk.15.ffn_down.weight | 0xbab5f400 | 0x2904000 | +| 144 | blk.15.ffn_gate.weight | 0xbd463400 | 0x2904000 | +| 145 | blk.15.ffn_norm.weight | 0xbfd67400 | 0x4000 | +| 146 | blk.15.ffn_up.weight | 0xbfd6b400 | 0x2904000 | +| 147 | blk.16.attn_k.weight | 0xc266f400 | 0x348000 | +| 148 | blk.16.attn_norm.weight | 0xc29b7400 | 0x4000 | +| 149 | blk.16.attn_output.weight | 0xc29bb400 | 0xd20000 | +| 150 | blk.16.attn_q.weight | 0xc36db400 | 0xd20000 | +| 151 | blk.16.attn_v.weight | 0xc43fb400 | 0x440000 | +| 152 | blk.16.ffn_down.weight | 0xc483b400 | 0x2904000 | +| 153 | blk.16.ffn_gate.weight | 0xc713f400 | 0x2904000 | +| 154 | blk.16.ffn_norm.weight | 0xc9a43400 | 0x4000 | +| 155 | blk.16.ffn_up.weight | 0xc9a47400 | 0x2904000 | +| 156 | blk.17.attn_k.weight | 0xcc34b400 | 0x2c0000 | +| 157 | blk.17.attn_norm.weight | 0xcc60b400 | 0x4000 | +| 158 | blk.17.attn_output.weight | 0xcc60f400 | 0xd20000 | +| 159 | blk.17.attn_q.weight | 0xcd32f400 | 0xd20000 | +| 160 | blk.17.attn_v.weight | 0xce04f400 | 0x440000 | +| 161 | blk.17.ffn_down.weight | 0xce48f400 | 0x2904000 | +| 162 | blk.17.ffn_gate.weight | 0xd0d93400 | 0x2904000 | +| 163 | blk.17.ffn_norm.weight | 0xd3697400 | 0x4000 | +| 164 | blk.17.ffn_up.weight | 0xd369b400 | 0x2904000 | +| 165 | blk.18.attn_k.weight | 0xd5f9f400 | 0x2c0000 | +| 166 | blk.18.attn_norm.weight | 0xd625f400 | 0x4000 | +| 167 | blk.18.attn_output.weight | 0xd6263400 | 0xd20000 | +| 168 | blk.18.attn_q.weight | 0xd6f83400 | 0xb00000 | +| 169 | blk.18.attn_v.weight | 0xd7a83400 | 0x440000 | +| 170 | blk.18.ffn_down.weight | 0xd7ec3400 | 0x2904000 | +| 171 | blk.18.ffn_gate.weight | 0xda7c7400 | 0x2904000 | +| 172 | blk.18.ffn_norm.weight | 0xdd0cb400 | 0x4000 | +| 173 | blk.18.ffn_up.weight | 0xdd0cf400 | 0x2904000 | +| 174 | blk.19.attn_k.weight | 0xdf9d3400 | 0x2c0000 | +| 175 | blk.19.attn_norm.weight | 0xdfc93400 | 0x4000 | +| 176 | blk.19.attn_output.weight | 0xdfc97400 | 0xd20000 | +| 177 | blk.19.attn_q.weight | 0xe09b7400 | 0xb00000 | +| 178 | blk.19.attn_v.weight | 0xe14b7400 | 0x440000 | +| 179 | blk.19.ffn_down.weight | 0xe18f7400 | 0x2904000 | +| 180 | blk.19.ffn_gate.weight | 0xe41fb400 | 0x2904000 | +| 181 | blk.19.ffn_norm.weight | 0xe6aff400 | 0x4000 | +| 182 | blk.19.ffn_up.weight | 0xe6b03400 | 0x2904000 | +| 183 | blk.20.attn_k.weight | 0xe9407400 | 0x2c0000 | +| 184 | blk.20.attn_norm.weight | 0xe96c7400 | 0x4000 | +| 185 | blk.20.attn_output.weight | 0xe96cb400 | 0xd20000 | +| 186 | blk.20.attn_q.weight | 0xea3eb400 | 0xd20000 | +| 187 | blk.20.attn_v.weight | 0xeb10b400 | 0x440000 | +| 188 | blk.20.ffn_down.weight | 0xeb54b400 | 0x2904000 | +| 189 | blk.20.ffn_gate.weight | 0xede4f400 | 0x2904000 | +| 190 | blk.20.ffn_norm.weight | 0xf0753400 | 0x4000 | +| 191 | blk.20.ffn_up.weight | 0xf0757400 | 0x2904000 | +| 192 | blk.21.attn_k.weight | 0xf305b400 | 0x2c0000 | +| 193 | blk.21.attn_norm.weight | 0xf331b400 | 0x4000 | +| 194 | blk.21.attn_output.weight | 0xf331f400 | 0xd20000 | +| 195 | blk.21.attn_q.weight | 0xf403f400 | 0xb00000 | +| 196 | blk.21.attn_v.weight | 0xf4b3f400 | 0x440000 | +| 197 | blk.21.ffn_down.weight | 0xf4f7f400 | 0x2904000 | +| 198 | blk.21.ffn_gate.weight | 0xf7883400 | 0x2904000 | +| 199 | blk.21.ffn_norm.weight | 0xfa187400 | 0x4000 | +| 200 | blk.21.ffn_up.weight | 0xfa18b400 | 0x2904000 | +| 201 | blk.22.attn_k.weight | 0xfca8f400 | 0x2c0000 | +| 202 | blk.22.attn_norm.weight | 0xfcd4f400 | 0x4000 | +| 203 | blk.22.attn_output.weight | 0xfcd53400 | 0xd20000 | +| 204 | blk.22.attn_q.weight | 0xfda73400 | 0xd20000 | +| 205 | blk.22.attn_v.weight | 0xfe793400 | 0x440000 | +| 206 | blk.22.ffn_down.weight | 0xfebd3400 | 0x2904000 | +| 207 | blk.22.ffn_gate.weight | 0x1014d7400 | 0x2904000 | +| 208 | blk.22.ffn_norm.weight | 0x103ddb400 | 0x4000 | +| 209 | blk.22.ffn_up.weight | 0x103ddf400 | 0x2904000 | +| 210 | blk.23.attn_k.weight | 0x1066e3400 | 0x2c0000 | +| 211 | blk.23.attn_norm.weight | 0x1069a3400 | 0x4000 | +| 212 | blk.23.attn_output.weight | 0x1069a7400 | 0xd20000 | +| 213 | blk.23.attn_q.weight | 0x1076c7400 | 0xd20000 | +| 214 | blk.23.attn_v.weight | 0x1083e7400 | 0x440000 | +| 215 | blk.23.ffn_down.weight | 0x108827400 | 0x2904000 | +| 216 | blk.23.ffn_gate.weight | 0x10b12b400 | 0x2904000 | +| 217 | blk.23.ffn_norm.weight | 0x10da2f400 | 0x4000 | +| 218 | blk.23.ffn_up.weight | 0x10da33400 | 0x2904000 | +| 219 | blk.24.attn_k.weight | 0x110337400 | 0x2c0000 | +| 220 | blk.24.attn_norm.weight | 0x1105f7400 | 0x4000 | +| 221 | blk.24.attn_output.weight | 0x1105fb400 | 0xd20000 | +| 222 | blk.24.attn_q.weight | 0x11131b400 | 0xb00000 | +| 223 | blk.24.attn_v.weight | 0x111e1b400 | 0x440000 | +| 224 | blk.24.ffn_down.weight | 0x11225b400 | 0x2904000 | +| 225 | blk.24.ffn_gate.weight | 0x114b5f400 | 0x2904000 | +| 226 | blk.24.ffn_norm.weight | 0x117463400 | 0x4000 | +| 227 | blk.24.ffn_up.weight | 0x117467400 | 0x2904000 | +| 228 | blk.25.attn_k.weight | 0x119d6b400 | 0x2c0000 | +| 229 | blk.25.attn_norm.weight | 0x11a02b400 | 0x4000 | +| 230 | blk.25.attn_output.weight | 0x11a02f400 | 0xd20000 | +| 231 | blk.25.attn_q.weight | 0x11ad4f400 | 0xd20000 | +| 232 | blk.25.attn_v.weight | 0x11ba6f400 | 0x440000 | +| 233 | blk.25.ffn_down.weight | 0x11beaf400 | 0x2904000 | +| 234 | blk.25.ffn_gate.weight | 0x11e7b3400 | 0x2904000 | +| 235 | blk.25.ffn_norm.weight | 0x1210b7400 | 0x4000 | +| 236 | blk.25.ffn_up.weight | 0x1210bb400 | 0x2904000 | +| 237 | blk.26.attn_k.weight | 0x1239bf400 | 0x2c0000 | +| 238 | blk.26.attn_norm.weight | 0x123c7f400 | 0x4000 | +| 239 | blk.26.attn_output.weight | 0x123c83400 | 0xd20000 | +| 240 | blk.26.attn_q.weight | 0x1249a3400 | 0xb00000 | +| 241 | blk.26.attn_v.weight | 0x1254a3400 | 0x440000 | +| 242 | blk.26.ffn_down.weight | 0x1258e3400 | 0x2904000 | +| 243 | blk.26.ffn_gate.weight | 0x1281e7400 | 0x2904000 | +| 244 | blk.26.ffn_norm.weight | 0x12aaeb400 | 0x4000 | +| 245 | blk.26.ffn_up.weight | 0x12aaef400 | 0x2904000 | +| 246 | blk.27.attn_k.weight | 0x12d3f3400 | 0x2c0000 | +| 247 | blk.27.attn_norm.weight | 0x12d6b3400 | 0x4000 | +| 248 | blk.27.attn_output.weight | 0x12d6b7400 | 0xd20000 | +| 249 | blk.27.attn_q.weight | 0x12e3d7400 | 0xd20000 | +| 250 | blk.27.attn_v.weight | 0x12f0f7400 | 0x440000 | +| 251 | blk.27.ffn_down.weight | 0x12f537400 | 0x2904000 | +| 252 | blk.27.ffn_gate.weight | 0x131e3b400 | 0x2904000 | +| 253 | blk.27.ffn_norm.weight | 0x13473f400 | 0x4000 | +| 254 | blk.27.ffn_up.weight | 0x134743400 | 0x2904000 | +| 255 | blk.28.attn_k.weight | 0x137047400 | 0x348000 | +| 256 | blk.28.attn_norm.weight | 0x13738f400 | 0x4000 | +| 257 | blk.28.attn_output.weight | 0x137393400 | 0xd20000 | +| 258 | blk.28.attn_q.weight | 0x1380b3400 | 0xd20000 | +| 259 | blk.28.attn_v.weight | 0x138dd3400 | 0x440000 | +| 260 | blk.28.ffn_down.weight | 0x139213400 | 0x2904000 | +| 261 | blk.28.ffn_gate.weight | 0x13bb17400 | 0x2904000 | +| 262 | blk.28.ffn_norm.weight | 0x13e41b400 | 0x4000 | +| 263 | blk.28.ffn_up.weight | 0x13e41f400 | 0x2904000 | +| 264 | blk.29.attn_k.weight | 0x140d23400 | 0x2c0000 | +| 265 | blk.29.attn_norm.weight | 0x140fe3400 | 0x4000 | +| 266 | blk.29.attn_output.weight | 0x140fe7400 | 0xd20000 | +| 267 | blk.29.attn_q.weight | 0x141d07400 | 0xd20000 | +| 268 | blk.29.attn_v.weight | 0x142a27400 | 0x440000 | +| 269 | blk.29.ffn_down.weight | 0x142e67400 | 0x2904000 | +| 270 | blk.29.ffn_gate.weight | 0x14576b400 | 0x2904000 | +| 271 | blk.29.ffn_norm.weight | 0x14806f400 | 0x4000 | +| 272 | blk.29.ffn_up.weight | 0x148073400 | 0x2904000 | +| 273 | blk.30.attn_k.weight | 0x14a977400 | 0x348000 | +| 274 | blk.30.attn_norm.weight | 0x14acbf400 | 0x4000 | +| 275 | blk.30.attn_output.weight | 0x14acc3400 | 0xd20000 | +| 276 | blk.30.attn_q.weight | 0x14b9e3400 | 0xd20000 | +| 277 | blk.30.attn_v.weight | 0x14c703400 | 0x440000 | +| 278 | blk.30.ffn_down.weight | 0x14cb43400 | 0x2904000 | +| 279 | blk.30.ffn_gate.weight | 0x14f447400 | 0x2904000 | +| 280 | blk.30.ffn_norm.weight | 0x151d4b400 | 0x4000 | +| 281 | blk.30.ffn_up.weight | 0x151d4f400 | 0x2904000 | +| 282 | blk.31.attn_k.weight | 0x154653400 | 0x2c0000 | +| 283 | blk.31.attn_norm.weight | 0x154913400 | 0x4000 | +| 284 | blk.31.attn_output.weight | 0x154917400 | 0xd20000 | +| 285 | blk.31.attn_q.weight | 0x155637400 | 0xd20000 | +| 286 | blk.31.attn_v.weight | 0x156357400 | 0x440000 | +| 287 | blk.31.ffn_down.weight | 0x156797400 | 0x2904000 | +| 288 | blk.31.ffn_gate.weight | 0x15909b400 | 0x2904000 | +| 289 | blk.31.ffn_norm.weight | 0x15b99f400 | 0x4000 | +| 290 | blk.31.ffn_up.weight | 0x15b9a3400 | 0x2904000 | +| 291 | blk.32.attn_k.weight | 0x15e2a7400 | 0x2c0000 | +| 292 | blk.32.attn_norm.weight | 0x15e567400 | 0x4000 | +| 293 | blk.32.attn_output.weight | 0x15e56b400 | 0xd20000 | +| 294 | blk.32.attn_q.weight | 0x15f28b400 | 0xd20000 | +| 295 | blk.32.attn_v.weight | 0x15ffab400 | 0x440000 | +| 296 | blk.32.ffn_down.weight | 0x1603eb400 | 0x2904000 | +| 297 | blk.32.ffn_gate.weight | 0x162cef400 | 0x2904000 | +| 298 | blk.32.ffn_norm.weight | 0x1655f3400 | 0x4000 | +| 299 | blk.32.ffn_up.weight | 0x1655f7400 | 0x2904000 | +| 300 | blk.33.attn_k.weight | 0x167efb400 | 0x2c0000 | +| 301 | blk.33.attn_norm.weight | 0x1681bb400 | 0x4000 | +| 302 | blk.33.attn_output.weight | 0x1681bf400 | 0xd20000 | +| 303 | blk.33.attn_q.weight | 0x168edf400 | 0xb00000 | +| 304 | blk.33.attn_v.weight | 0x1699df400 | 0x440000 | +| 305 | blk.33.ffn_down.weight | 0x169e1f400 | 0x2260000 | +| 306 | blk.33.ffn_gate.weight | 0x16c07f400 | 0x2904000 | +| 307 | blk.33.ffn_norm.weight | 0x16e983400 | 0x4000 | +| 308 | blk.33.ffn_up.weight | 0x16e987400 | 0x2904000 | +| 309 | blk.34.attn_k.weight | 0x17128b400 | 0x2c0000 | +| 310 | blk.34.attn_norm.weight | 0x17154b400 | 0x4000 | +| 311 | blk.34.attn_output.weight | 0x17154f400 | 0xd20000 | +| 312 | blk.34.attn_q.weight | 0x17226f400 | 0xd20000 | +| 313 | blk.34.attn_v.weight | 0x172f8f400 | 0x440000 | +| 314 | blk.34.ffn_down.weight | 0x1733cf400 | 0x2904000 | +| 315 | blk.34.ffn_gate.weight | 0x175cd3400 | 0x2904000 | +| 316 | blk.34.ffn_norm.weight | 0x1785d7400 | 0x4000 | +| 317 | blk.34.ffn_up.weight | 0x1785db400 | 0x2904000 | +| 318 | blk.35.attn_k.weight | 0x17aedf400 | 0x2c0000 | +| 319 | blk.35.attn_norm.weight | 0x17b19f400 | 0x4000 | +| 320 | blk.35.attn_output.weight | 0x17b1a3400 | 0xd20000 | +| 321 | blk.35.attn_q.weight | 0x17bec3400 | 0xd20000 | +| 322 | blk.35.attn_v.weight | 0x17cbe3400 | 0x348000 | +| 323 | blk.35.ffn_down.weight | 0x17cf2b400 | 0x2260000 | +| 324 | blk.35.ffn_gate.weight | 0x17f18b400 | 0x2904000 | +| 325 | blk.35.ffn_norm.weight | 0x181a8f400 | 0x4000 | +| 326 | blk.35.ffn_up.weight | 0x181a93400 | 0x2904000 | +| 327 | blk.36.attn_k.weight | 0x184397400 | 0x2c0000 | +| 328 | blk.36.attn_norm.weight | 0x184657400 | 0x4000 | +| 329 | blk.36.attn_output.weight | 0x18465b400 | 0xd20000 | +| 330 | blk.36.attn_q.weight | 0x18537b400 | 0xd20000 | +| 331 | blk.36.attn_v.weight | 0x18609b400 | 0x348000 | +| 332 | blk.36.ffn_down.weight | 0x1863e3400 | 0x2260000 | +| 333 | blk.36.ffn_gate.weight | 0x188643400 | 0x2904000 | +| 334 | blk.36.ffn_norm.weight | 0x18af47400 | 0x4000 | +| 335 | blk.36.ffn_up.weight | 0x18af4b400 | 0x2904000 | +| 336 | blk.37.attn_k.weight | 0x18d84f400 | 0x2c0000 | +| 337 | blk.37.attn_norm.weight | 0x18db0f400 | 0x4000 | +| 338 | blk.37.attn_output.weight | 0x18db13400 | 0xd20000 | +| 339 | blk.37.attn_q.weight | 0x18e833400 | 0xb00000 | +| 340 | blk.37.attn_v.weight | 0x18f333400 | 0x348000 | +| 341 | blk.37.ffn_down.weight | 0x18f67b400 | 0x2904000 | +| 342 | blk.37.ffn_gate.weight | 0x191f7f400 | 0x2904000 | +| 343 | blk.37.ffn_norm.weight | 0x194883400 | 0x4000 | +| 344 | blk.37.ffn_up.weight | 0x194887400 | 0x2904000 | +| 345 | blk.38.attn_k.weight | 0x19718b400 | 0x2c0000 | +| 346 | blk.38.attn_norm.weight | 0x19744b400 | 0x4000 | +| 347 | blk.38.attn_output.weight | 0x19744f400 | 0xd20000 | +| 348 | blk.38.attn_q.weight | 0x19816f400 | 0xd20000 | +| 349 | blk.38.attn_v.weight | 0x198e8f400 | 0x348000 | +| 350 | blk.38.ffn_down.weight | 0x1991d7400 | 0x2904000 | +| 351 | blk.38.ffn_gate.weight | 0x19badb400 | 0x2904000 | +| 352 | blk.38.ffn_norm.weight | 0x19e3df400 | 0x4000 | +| 353 | blk.38.ffn_up.weight | 0x19e3e3400 | 0x2904000 | +| 354 | blk.39.attn_k.weight | 0x1a0ce7400 | 0x2c0000 | +| 355 | blk.39.attn_norm.weight | 0x1a0fa7400 | 0x4000 | +| 356 | blk.39.attn_output.weight | 0x1a0fab400 | 0xd20000 | +| 357 | blk.39.attn_q.weight | 0x1a1ccb400 | 0xb00000 | +| 358 | blk.39.attn_v.weight | 0x1a27cb400 | 0x348000 | +| 359 | blk.39.ffn_down.weight | 0x1a2b13400 | 0x2260000 | +| 360 | blk.39.ffn_gate.weight | 0x1a4d73400 | 0x2904000 | +| 361 | blk.39.ffn_norm.weight | 0x1a7677400 | 0x4000 | +| 362 | blk.39.ffn_up.weight | 0x1a767b400 | 0x2904000 | + +### Base Tensor Group : ~822M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------|:---------------------------------|:------------------|:----------------------|:-----|--------:| +| 0 | output.weight | Output (W) | (~411M) 411041792 | 4096 x 100352 x 1 x 1 | Q8_0 | 8.5000 | +| 1 | output_norm.weight | Output Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 2 | token_embd.weight | Token Embedding (W) | (~411M) 411041792 | 4096 x 100352 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in base: (~822M) 822087680 +- Percentage of total elements: 9.35% +- Bits per Weight (BPW) for base: 7.5314 bits + + +### Block 0 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 3 | blk.0.attn_k.weight | Block 0 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q4_K | 4.5000 | +| 4 | blk.0.attn_norm.weight | Block 0 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 5 | blk.0.attn_output.weight | Block 0 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 6 | blk.0.attn_q.weight | Block 0 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q4_K | 4.5000 | +| 7 | blk.0.attn_v.weight | Block 0 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 8 | blk.0.ffn_down.weight | Block 0 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 9 | blk.0.ffn_gate.weight | Block 0 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 10 | blk.0.ffn_norm.weight | Block 0 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 11 | blk.0.ffn_up.weight | Block 0 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.0: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.0: 5.6978 bits + + +### Block 1 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 12 | blk.1.attn_k.weight | Block 1 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 13 | blk.1.attn_norm.weight | Block 1 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 14 | blk.1.attn_output.weight | Block 1 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 15 | blk.1.attn_q.weight | Block 1 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 16 | blk.1.attn_v.weight | Block 1 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 17 | blk.1.ffn_down.weight | Block 1 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 18 | blk.1.ffn_gate.weight | Block 1 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 19 | blk.1.ffn_norm.weight | Block 1 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 20 | blk.1.ffn_up.weight | Block 1 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.1: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.1: 6.4925 bits + + +### Block 2 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 21 | blk.2.attn_k.weight | Block 2 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 22 | blk.2.attn_norm.weight | Block 2 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 23 | blk.2.attn_output.weight | Block 2 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 24 | blk.2.attn_q.weight | Block 2 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 25 | blk.2.attn_v.weight | Block 2 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 26 | blk.2.ffn_down.weight | Block 2 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 27 | blk.2.ffn_gate.weight | Block 2 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 28 | blk.2.ffn_norm.weight | Block 2 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 29 | blk.2.ffn_up.weight | Block 2 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.2: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.2: 6.4925 bits + + +### Block 3 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 30 | blk.3.attn_k.weight | Block 3 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 31 | blk.3.attn_norm.weight | Block 3 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 32 | blk.3.attn_output.weight | Block 3 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 33 | blk.3.attn_q.weight | Block 3 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 34 | blk.3.attn_v.weight | Block 3 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 35 | blk.3.ffn_down.weight | Block 3 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 36 | blk.3.ffn_gate.weight | Block 3 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 37 | blk.3.ffn_norm.weight | Block 3 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 38 | blk.3.ffn_up.weight | Block 3 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.3: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.3: 6.2129 bits + + +### Block 4 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 39 | blk.4.attn_k.weight | Block 4 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 40 | blk.4.attn_norm.weight | Block 4 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 41 | blk.4.attn_output.weight | Block 4 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 42 | blk.4.attn_q.weight | Block 4 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 43 | blk.4.attn_v.weight | Block 4 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 44 | blk.4.ffn_down.weight | Block 4 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 45 | blk.4.ffn_gate.weight | Block 4 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 46 | blk.4.ffn_norm.weight | Block 4 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 47 | blk.4.ffn_up.weight | Block 4 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.4: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.4: 6.3024 bits + + +### Block 5 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 48 | blk.5.attn_k.weight | Block 5 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 49 | blk.5.attn_norm.weight | Block 5 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 50 | blk.5.attn_output.weight | Block 5 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 51 | blk.5.attn_q.weight | Block 5 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 52 | blk.5.attn_v.weight | Block 5 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 53 | blk.5.ffn_down.weight | Block 5 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 54 | blk.5.ffn_gate.weight | Block 5 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 55 | blk.5.ffn_norm.weight | Block 5 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 56 | blk.5.ffn_up.weight | Block 5 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.5: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.5: 6.2129 bits + + +### Block 6 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 57 | blk.6.attn_k.weight | Block 6 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 58 | blk.6.attn_norm.weight | Block 6 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 59 | blk.6.attn_output.weight | Block 6 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 60 | blk.6.attn_q.weight | Block 6 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 61 | blk.6.attn_v.weight | Block 6 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 62 | blk.6.ffn_down.weight | Block 6 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 63 | blk.6.ffn_gate.weight | Block 6 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 64 | blk.6.ffn_norm.weight | Block 6 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 65 | blk.6.ffn_up.weight | Block 6 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.6: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.6: 6.2129 bits + + +### Block 7 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 66 | blk.7.attn_k.weight | Block 7 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 67 | blk.7.attn_norm.weight | Block 7 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 68 | blk.7.attn_output.weight | Block 7 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 69 | blk.7.attn_q.weight | Block 7 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 70 | blk.7.attn_v.weight | Block 7 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 71 | blk.7.ffn_down.weight | Block 7 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 72 | blk.7.ffn_gate.weight | Block 7 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 73 | blk.7.ffn_norm.weight | Block 7 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 74 | blk.7.ffn_up.weight | Block 7 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.7: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.7: 6.6043 bits + + +### Block 8 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 75 | blk.8.attn_k.weight | Block 8 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 76 | blk.8.attn_norm.weight | Block 8 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 77 | blk.8.attn_output.weight | Block 8 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 78 | blk.8.attn_q.weight | Block 8 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 79 | blk.8.attn_v.weight | Block 8 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 80 | blk.8.ffn_down.weight | Block 8 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 81 | blk.8.ffn_gate.weight | Block 8 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 82 | blk.8.ffn_norm.weight | Block 8 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 83 | blk.8.ffn_up.weight | Block 8 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.8: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.8: 5.9333 bits + + +### Block 9 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 84 | blk.9.attn_k.weight | Block 9 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 85 | blk.9.attn_norm.weight | Block 9 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 86 | blk.9.attn_output.weight | Block 9 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 87 | blk.9.attn_q.weight | Block 9 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 88 | blk.9.attn_v.weight | Block 9 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 89 | blk.9.ffn_down.weight | Block 9 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 90 | blk.9.ffn_gate.weight | Block 9 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 91 | blk.9.ffn_norm.weight | Block 9 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 92 | blk.9.ffn_up.weight | Block 9 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.9: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.9: 5.9333 bits + + +### Block 10 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 93 | blk.10.attn_k.weight | Block 10 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 94 | blk.10.attn_norm.weight | Block 10 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 95 | blk.10.attn_output.weight | Block 10 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 96 | blk.10.attn_q.weight | Block 10 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 97 | blk.10.attn_v.weight | Block 10 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 98 | blk.10.ffn_down.weight | Block 10 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 99 | blk.10.ffn_gate.weight | Block 10 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 100 | blk.10.ffn_norm.weight | Block 10 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 101 | blk.10.ffn_up.weight | Block 10 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.10: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.10: 5.9333 bits + + +### Block 11 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 102 | blk.11.attn_k.weight | Block 11 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 103 | blk.11.attn_norm.weight | Block 11 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 104 | blk.11.attn_output.weight | Block 11 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 105 | blk.11.attn_q.weight | Block 11 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 106 | blk.11.attn_v.weight | Block 11 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 107 | blk.11.ffn_down.weight | Block 11 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 108 | blk.11.ffn_gate.weight | Block 11 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 109 | blk.11.ffn_norm.weight | Block 11 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 110 | blk.11.ffn_up.weight | Block 11 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.11: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.11: 6.2129 bits + + +### Block 12 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 111 | blk.12.attn_k.weight | Block 12 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 112 | blk.12.attn_norm.weight | Block 12 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 113 | blk.12.attn_output.weight | Block 12 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 114 | blk.12.attn_q.weight | Block 12 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 115 | blk.12.attn_v.weight | Block 12 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 116 | blk.12.ffn_down.weight | Block 12 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 117 | blk.12.ffn_gate.weight | Block 12 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 118 | blk.12.ffn_norm.weight | Block 12 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 119 | blk.12.ffn_up.weight | Block 12 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.12: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.12: 6.2129 bits + + +### Block 13 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 120 | blk.13.attn_k.weight | Block 13 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 121 | blk.13.attn_norm.weight | Block 13 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 122 | blk.13.attn_output.weight | Block 13 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 123 | blk.13.attn_q.weight | Block 13 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 124 | blk.13.attn_v.weight | Block 13 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 125 | blk.13.ffn_down.weight | Block 13 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 126 | blk.13.ffn_gate.weight | Block 13 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q5_K | 5.5000 | +| 127 | blk.13.ffn_norm.weight | Block 13 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 128 | blk.13.ffn_up.weight | Block 13 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.13: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.13: 6.2129 bits + + +### Block 14 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 129 | blk.14.attn_k.weight | Block 14 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 130 | blk.14.attn_norm.weight | Block 14 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 131 | blk.14.attn_output.weight | Block 14 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 132 | blk.14.attn_q.weight | Block 14 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 133 | blk.14.attn_v.weight | Block 14 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 134 | blk.14.ffn_down.weight | Block 14 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 135 | blk.14.ffn_gate.weight | Block 14 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 136 | blk.14.ffn_norm.weight | Block 14 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 137 | blk.14.ffn_up.weight | Block 14 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.14: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.14: 6.4925 bits + + +### Block 15 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 138 | blk.15.attn_k.weight | Block 15 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 139 | blk.15.attn_norm.weight | Block 15 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 140 | blk.15.attn_output.weight | Block 15 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 141 | blk.15.attn_q.weight | Block 15 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 142 | blk.15.attn_v.weight | Block 15 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 143 | blk.15.ffn_down.weight | Block 15 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 144 | blk.15.ffn_gate.weight | Block 15 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 145 | blk.15.ffn_norm.weight | Block 15 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 146 | blk.15.ffn_up.weight | Block 15 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.15: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.15: 6.5820 bits + + +### Block 16 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 147 | blk.16.attn_k.weight | Block 16 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 148 | blk.16.attn_norm.weight | Block 16 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 149 | blk.16.attn_output.weight | Block 16 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 150 | blk.16.attn_q.weight | Block 16 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 151 | blk.16.attn_v.weight | Block 16 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 152 | blk.16.ffn_down.weight | Block 16 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 153 | blk.16.ffn_gate.weight | Block 16 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 154 | blk.16.ffn_norm.weight | Block 16 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 155 | blk.16.ffn_up.weight | Block 16 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.16: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.16: 6.6043 bits + + +### Block 17 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 156 | blk.17.attn_k.weight | Block 17 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 157 | blk.17.attn_norm.weight | Block 17 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 158 | blk.17.attn_output.weight | Block 17 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 159 | blk.17.attn_q.weight | Block 17 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 160 | blk.17.attn_v.weight | Block 17 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 161 | blk.17.ffn_down.weight | Block 17 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 162 | blk.17.ffn_gate.weight | Block 17 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 163 | blk.17.ffn_norm.weight | Block 17 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 164 | blk.17.ffn_up.weight | Block 17 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.17: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.17: 6.5820 bits + + +### Block 18 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 165 | blk.18.attn_k.weight | Block 18 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 166 | blk.18.attn_norm.weight | Block 18 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 167 | blk.18.attn_output.weight | Block 18 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 168 | blk.18.attn_q.weight | Block 18 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 169 | blk.18.attn_v.weight | Block 18 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 170 | blk.18.ffn_down.weight | Block 18 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 171 | blk.18.ffn_gate.weight | Block 18 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 172 | blk.18.ffn_norm.weight | Block 18 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 173 | blk.18.ffn_up.weight | Block 18 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.18: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.18: 6.4925 bits + + +### Block 19 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 174 | blk.19.attn_k.weight | Block 19 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 175 | blk.19.attn_norm.weight | Block 19 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 176 | blk.19.attn_output.weight | Block 19 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 177 | blk.19.attn_q.weight | Block 19 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 178 | blk.19.attn_v.weight | Block 19 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 179 | blk.19.ffn_down.weight | Block 19 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 180 | blk.19.ffn_gate.weight | Block 19 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 181 | blk.19.ffn_norm.weight | Block 19 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 182 | blk.19.ffn_up.weight | Block 19 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.19: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.19: 6.4925 bits + + +### Block 20 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 183 | blk.20.attn_k.weight | Block 20 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 184 | blk.20.attn_norm.weight | Block 20 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 185 | blk.20.attn_output.weight | Block 20 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 186 | blk.20.attn_q.weight | Block 20 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 187 | blk.20.attn_v.weight | Block 20 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 188 | blk.20.ffn_down.weight | Block 20 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 189 | blk.20.ffn_gate.weight | Block 20 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 190 | blk.20.ffn_norm.weight | Block 20 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 191 | blk.20.ffn_up.weight | Block 20 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.20: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.20: 6.5820 bits + + +### Block 21 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 192 | blk.21.attn_k.weight | Block 21 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 193 | blk.21.attn_norm.weight | Block 21 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 194 | blk.21.attn_output.weight | Block 21 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 195 | blk.21.attn_q.weight | Block 21 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 196 | blk.21.attn_v.weight | Block 21 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 197 | blk.21.ffn_down.weight | Block 21 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 198 | blk.21.ffn_gate.weight | Block 21 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 199 | blk.21.ffn_norm.weight | Block 21 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 200 | blk.21.ffn_up.weight | Block 21 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.21: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.21: 6.4925 bits + + +### Block 22 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 201 | blk.22.attn_k.weight | Block 22 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 202 | blk.22.attn_norm.weight | Block 22 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 203 | blk.22.attn_output.weight | Block 22 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 204 | blk.22.attn_q.weight | Block 22 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 205 | blk.22.attn_v.weight | Block 22 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 206 | blk.22.ffn_down.weight | Block 22 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 207 | blk.22.ffn_gate.weight | Block 22 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 208 | blk.22.ffn_norm.weight | Block 22 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 209 | blk.22.ffn_up.weight | Block 22 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.22: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.22: 6.5820 bits + + +### Block 23 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 210 | blk.23.attn_k.weight | Block 23 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 211 | blk.23.attn_norm.weight | Block 23 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 212 | blk.23.attn_output.weight | Block 23 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 213 | blk.23.attn_q.weight | Block 23 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 214 | blk.23.attn_v.weight | Block 23 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 215 | blk.23.ffn_down.weight | Block 23 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 216 | blk.23.ffn_gate.weight | Block 23 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 217 | blk.23.ffn_norm.weight | Block 23 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 218 | blk.23.ffn_up.weight | Block 23 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.23: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.23: 6.5820 bits + + +### Block 24 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 219 | blk.24.attn_k.weight | Block 24 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 220 | blk.24.attn_norm.weight | Block 24 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 221 | blk.24.attn_output.weight | Block 24 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 222 | blk.24.attn_q.weight | Block 24 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 223 | blk.24.attn_v.weight | Block 24 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 224 | blk.24.ffn_down.weight | Block 24 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 225 | blk.24.ffn_gate.weight | Block 24 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 226 | blk.24.ffn_norm.weight | Block 24 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 227 | blk.24.ffn_up.weight | Block 24 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.24: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.24: 6.4925 bits + + +### Block 25 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 228 | blk.25.attn_k.weight | Block 25 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 229 | blk.25.attn_norm.weight | Block 25 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 230 | blk.25.attn_output.weight | Block 25 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 231 | blk.25.attn_q.weight | Block 25 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 232 | blk.25.attn_v.weight | Block 25 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 233 | blk.25.ffn_down.weight | Block 25 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 234 | blk.25.ffn_gate.weight | Block 25 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 235 | blk.25.ffn_norm.weight | Block 25 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 236 | blk.25.ffn_up.weight | Block 25 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.25: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.25: 6.5820 bits + + +### Block 26 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 237 | blk.26.attn_k.weight | Block 26 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 238 | blk.26.attn_norm.weight | Block 26 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 239 | blk.26.attn_output.weight | Block 26 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 240 | blk.26.attn_q.weight | Block 26 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 241 | blk.26.attn_v.weight | Block 26 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 242 | blk.26.ffn_down.weight | Block 26 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 243 | blk.26.ffn_gate.weight | Block 26 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 244 | blk.26.ffn_norm.weight | Block 26 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 245 | blk.26.ffn_up.weight | Block 26 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.26: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.26: 6.4925 bits + + +### Block 27 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 246 | blk.27.attn_k.weight | Block 27 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 247 | blk.27.attn_norm.weight | Block 27 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 248 | blk.27.attn_output.weight | Block 27 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 249 | blk.27.attn_q.weight | Block 27 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 250 | blk.27.attn_v.weight | Block 27 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 251 | blk.27.ffn_down.weight | Block 27 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 252 | blk.27.ffn_gate.weight | Block 27 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 253 | blk.27.ffn_norm.weight | Block 27 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 254 | blk.27.ffn_up.weight | Block 27 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.27: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.27: 6.5820 bits + + +### Block 28 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 255 | blk.28.attn_k.weight | Block 28 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 256 | blk.28.attn_norm.weight | Block 28 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 257 | blk.28.attn_output.weight | Block 28 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 258 | blk.28.attn_q.weight | Block 28 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 259 | blk.28.attn_v.weight | Block 28 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 260 | blk.28.ffn_down.weight | Block 28 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 261 | blk.28.ffn_gate.weight | Block 28 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 262 | blk.28.ffn_norm.weight | Block 28 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 263 | blk.28.ffn_up.weight | Block 28 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.28: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.28: 6.6043 bits + + +### Block 29 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 264 | blk.29.attn_k.weight | Block 29 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 265 | blk.29.attn_norm.weight | Block 29 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 266 | blk.29.attn_output.weight | Block 29 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 267 | blk.29.attn_q.weight | Block 29 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 268 | blk.29.attn_v.weight | Block 29 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 269 | blk.29.ffn_down.weight | Block 29 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 270 | blk.29.ffn_gate.weight | Block 29 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 271 | blk.29.ffn_norm.weight | Block 29 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 272 | blk.29.ffn_up.weight | Block 29 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.29: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.29: 6.5820 bits + + +### Block 30 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 273 | blk.30.attn_k.weight | Block 30 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 274 | blk.30.attn_norm.weight | Block 30 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 275 | blk.30.attn_output.weight | Block 30 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 276 | blk.30.attn_q.weight | Block 30 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 277 | blk.30.attn_v.weight | Block 30 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 278 | blk.30.ffn_down.weight | Block 30 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 279 | blk.30.ffn_gate.weight | Block 30 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 280 | blk.30.ffn_norm.weight | Block 30 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 281 | blk.30.ffn_up.weight | Block 30 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.30: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.30: 6.6043 bits + + +### Block 31 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 282 | blk.31.attn_k.weight | Block 31 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 283 | blk.31.attn_norm.weight | Block 31 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 284 | blk.31.attn_output.weight | Block 31 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 285 | blk.31.attn_q.weight | Block 31 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 286 | blk.31.attn_v.weight | Block 31 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 287 | blk.31.ffn_down.weight | Block 31 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 288 | blk.31.ffn_gate.weight | Block 31 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 289 | blk.31.ffn_norm.weight | Block 31 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 290 | blk.31.ffn_up.weight | Block 31 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.31: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.31: 6.5820 bits + + +### Block 32 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 291 | blk.32.attn_k.weight | Block 32 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 292 | blk.32.attn_norm.weight | Block 32 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 293 | blk.32.attn_output.weight | Block 32 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 294 | blk.32.attn_q.weight | Block 32 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 295 | blk.32.attn_v.weight | Block 32 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 296 | blk.32.ffn_down.weight | Block 32 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 297 | blk.32.ffn_gate.weight | Block 32 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 298 | blk.32.ffn_norm.weight | Block 32 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 299 | blk.32.ffn_up.weight | Block 32 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.32: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.32: 6.5820 bits + + +### Block 33 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 300 | blk.33.attn_k.weight | Block 33 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 301 | blk.33.attn_norm.weight | Block 33 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 302 | blk.33.attn_output.weight | Block 33 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 303 | blk.33.attn_q.weight | Block 33 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 304 | blk.33.attn_v.weight | Block 33 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 305 | blk.33.ffn_down.weight | Block 33 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 306 | blk.33.ffn_gate.weight | Block 33 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 307 | blk.33.ffn_norm.weight | Block 33 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 308 | blk.33.ffn_up.weight | Block 33 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.33: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.33: 6.2129 bits + + +### Block 34 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 309 | blk.34.attn_k.weight | Block 34 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 310 | blk.34.attn_norm.weight | Block 34 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 311 | blk.34.attn_output.weight | Block 34 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 312 | blk.34.attn_q.weight | Block 34 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 313 | blk.34.attn_v.weight | Block 34 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 314 | blk.34.ffn_down.weight | Block 34 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 315 | blk.34.ffn_gate.weight | Block 34 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 316 | blk.34.ffn_norm.weight | Block 34 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 317 | blk.34.ffn_up.weight | Block 34 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.34: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.34: 6.5820 bits + + +### Block 35 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 318 | blk.35.attn_k.weight | Block 35 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 319 | blk.35.attn_norm.weight | Block 35 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 320 | blk.35.attn_output.weight | Block 35 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 321 | blk.35.attn_q.weight | Block 35 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 322 | blk.35.attn_v.weight | Block 35 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 323 | blk.35.ffn_down.weight | Block 35 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 324 | blk.35.ffn_gate.weight | Block 35 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 325 | blk.35.ffn_norm.weight | Block 35 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 326 | blk.35.ffn_up.weight | Block 35 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.35: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.35: 6.2616 bits + + +### Block 36 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 327 | blk.36.attn_k.weight | Block 36 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 328 | blk.36.attn_norm.weight | Block 36 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 329 | blk.36.attn_output.weight | Block 36 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 330 | blk.36.attn_q.weight | Block 36 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 331 | blk.36.attn_v.weight | Block 36 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 332 | blk.36.ffn_down.weight | Block 36 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 333 | blk.36.ffn_gate.weight | Block 36 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 334 | blk.36.ffn_norm.weight | Block 36 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 335 | blk.36.ffn_up.weight | Block 36 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.36: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.36: 6.2616 bits + + +### Block 37 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 336 | blk.37.attn_k.weight | Block 37 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 337 | blk.37.attn_norm.weight | Block 37 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 338 | blk.37.attn_output.weight | Block 37 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 339 | blk.37.attn_q.weight | Block 37 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 340 | blk.37.attn_v.weight | Block 37 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 341 | blk.37.ffn_down.weight | Block 37 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 342 | blk.37.ffn_gate.weight | Block 37 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 343 | blk.37.ffn_norm.weight | Block 37 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 344 | blk.37.ffn_up.weight | Block 37 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.37: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.37: 6.4517 bits + + +### Block 38 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 345 | blk.38.attn_k.weight | Block 38 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 346 | blk.38.attn_norm.weight | Block 38 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 347 | blk.38.attn_output.weight | Block 38 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 348 | blk.38.attn_q.weight | Block 38 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 349 | blk.38.attn_v.weight | Block 38 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 350 | blk.38.ffn_down.weight | Block 38 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 351 | blk.38.ffn_gate.weight | Block 38 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 352 | blk.38.ffn_norm.weight | Block 38 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 353 | blk.38.ffn_up.weight | Block 38 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.38: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.38: 6.5412 bits + + +### Block 39 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 354 | blk.39.attn_k.weight | Block 39 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q5_K | 5.5000 | +| 355 | blk.39.attn_norm.weight | Block 39 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 356 | blk.39.attn_output.weight | Block 39 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 357 | blk.39.attn_q.weight | Block 39 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 358 | blk.39.attn_v.weight | Block 39 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 359 | blk.39.ffn_down.weight | Block 39 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_K | 5.5000 | +| 360 | blk.39.ffn_gate.weight | Block 39 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 361 | blk.39.ffn_norm.weight | Block 39 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 362 | blk.39.ffn_up.weight | Block 39 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | + +- Total elements in blk.39: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.39: 6.1721 bits + + +Total BPW for granite-4.1-8b-Q6_K.gguf: 6.4998 bits diff --git a/scores/granite-4.1-8b-Q7_K.md b/scores/granite-4.1-8b-Q7_K.md new file mode 100644 index 0000000..cf3fb86 --- /dev/null +++ b/scores/granite-4.1-8b-Q7_K.md @@ -0,0 +1,1247 @@ +# granite-4.1-8b-WIP/granite-4.1-8b-Q7_K.gguf - GGUF Internal File Dump + +- Endian: LITTLE endian + +## Key Value Metadata Store + +There are 42 key-value pairs in this file + +| POS | TYPE | Count | Key | Value | +|----:|:---------|-------:|:-----------------------------------------|:--------------------------------------------------------------------| +| 1 | UINT32 | 1 | GGUF.version | 3 | +| 2 | UINT64 | 1 | GGUF.tensor_count | 363 | +| 3 | UINT64 | 1 | GGUF.kv_count | 39 | +| 4 | STRING | 1 | general.architecture | `granite` | +| 5 | STRING | 1 | general.type | `model` | +| 6 | STRING | 1 | general.name | `Granite 4.1 8b` | +| 7 | STRING | 1 | general.basename | `granite-4.1` | +| 8 | STRING | 1 | general.size_label | `8B` | +| 9 | STRING | 1 | general.license | `apache-2.0` | +| 10 | [STRING] | 2 | general.tags | [ `language`, `granite-4.1` ] | +| 11 | UINT32 | 1 | granite.block_count | 40 | +| 12 | UINT32 | 1 | granite.context_length | 131072 | +| 13 | UINT32 | 1 | granite.embedding_length | 4096 | +| 14 | UINT32 | 1 | granite.feed_forward_length | 12800 | +| 15 | UINT32 | 1 | granite.attention.head_count | 32 | +| 16 | UINT32 | 1 | granite.attention.head_count_kv | 8 | +| 17 | FLOAT32 | 1 | granite.rope.freq_base | 10000000.0 | +| 18 | FLOAT32 | 1 | granite.attention.layer_norm_rms_epsilon | 1e-05 | +| 19 | UINT32 | 1 | granite.vocab_size | 100352 | +| 20 | UINT32 | 1 | granite.rope.dimension_count | 128 | +| 21 | FLOAT32 | 1 | granite.attention.scale | 0.0078125 | +| 22 | FLOAT32 | 1 | granite.embedding_scale | 12.0 | +| 23 | FLOAT32 | 1 | granite.residual_scale | 0.22 | +| 24 | FLOAT32 | 1 | granite.logit_scale | 16.0 | +| 25 | STRING | 1 | tokenizer.ggml.model | `gpt2` | +| 26 | STRING | 1 | tokenizer.ggml.pre | `granite-docling` | +| 27 | [STRING] | 100352 | tokenizer.ggml.tokens | [ `!`, `"`, `#`, `$`, `%`, ... ] | +| 28 | [INT32] | 100352 | tokenizer.ggml.token_type | [ 1, 1, 1, 1, 1, 1, 1, ... ] | +| 29 | [STRING] | 100000 | tokenizer.ggml.merges | [ `Ġ Ġ`, `ĠĠ ĠĠ`, `i n`, `Ġ t`, `ĠĠĠĠ ĠĠĠĠ`, ... ] | +| 30 | UINT32 | 1 | tokenizer.ggml.bos_token_id | 100257 | +| 31 | UINT32 | 1 | tokenizer.ggml.eos_token_id | 100257 | +| 32 | UINT32 | 1 | tokenizer.ggml.unknown_token_id | 100269 | +| 33 | UINT32 | 1 | tokenizer.ggml.padding_token_id | 100256 | +| 34 | BOOL | 1 | tokenizer.ggml.add_bos_token | False | +| 35 | STRING | 1 | tokenizer.chat_template | `{%- set tools_system_message_p`...`end_of_role|>' }}{%- endif %}` | +| 36 | BOOL | 1 | tokenizer.ggml.add_space_prefix | False | +| 37 | UINT32 | 1 | general.quantization_version | 2 | +| 38 | UINT32 | 1 | general.file_type | 7 | +| 39 | STRING | 1 | quantize.imatrix.file | `imatrix/imatrix-granite-4.1-8b-medium.gguf` | +| 40 | STRING | 1 | quantize.imatrix.dataset | `../../datasets/imatrix/combined_all_medium.txt` | +| 41 | UINT32 | 1 | quantize.imatrix.entries_count | 280 | +| 42 | UINT32 | 1 | quantize.imatrix.chunks_count | 3601 | + +## Tensors Overview ~9B Elements + +Total number of elements in all tensors: 8791592960 Elements + +- [granite-4.1-8b-WIP/granite-4.1-8b-Q7\_K.gguf - GGUF Internal File Dump](#granite-41-8b-wipgranite-41-8b-q7_kgguf---gguf-internal-file-dump) + - [Key Value Metadata Store](#key-value-metadata-store) + - [Tensors Overview ~9B Elements](#tensors-overview-9b-elements) + - [Tensor Data Offset](#tensor-data-offset) + - [Base Tensor Group : ~822M Elements](#base-tensor-group--822m-elements) + - [Block 0 Tensor Group : ~199M Elements](#block-0-tensor-group--199m-elements) + - [Block 1 Tensor Group : ~199M Elements](#block-1-tensor-group--199m-elements) + - [Block 2 Tensor Group : ~199M Elements](#block-2-tensor-group--199m-elements) + - [Block 3 Tensor Group : ~199M Elements](#block-3-tensor-group--199m-elements) + - [Block 4 Tensor Group : ~199M Elements](#block-4-tensor-group--199m-elements) + - [Block 5 Tensor Group : ~199M Elements](#block-5-tensor-group--199m-elements) + - [Block 6 Tensor Group : ~199M Elements](#block-6-tensor-group--199m-elements) + - [Block 7 Tensor Group : ~199M Elements](#block-7-tensor-group--199m-elements) + - [Block 8 Tensor Group : ~199M Elements](#block-8-tensor-group--199m-elements) + - [Block 9 Tensor Group : ~199M Elements](#block-9-tensor-group--199m-elements) + - [Block 10 Tensor Group : ~199M Elements](#block-10-tensor-group--199m-elements) + - [Block 11 Tensor Group : ~199M Elements](#block-11-tensor-group--199m-elements) + - [Block 12 Tensor Group : ~199M Elements](#block-12-tensor-group--199m-elements) + - [Block 13 Tensor Group : ~199M Elements](#block-13-tensor-group--199m-elements) + - [Block 14 Tensor Group : ~199M Elements](#block-14-tensor-group--199m-elements) + - [Block 15 Tensor Group : ~199M Elements](#block-15-tensor-group--199m-elements) + - [Block 16 Tensor Group : ~199M Elements](#block-16-tensor-group--199m-elements) + - [Block 17 Tensor Group : ~199M Elements](#block-17-tensor-group--199m-elements) + - [Block 18 Tensor Group : ~199M Elements](#block-18-tensor-group--199m-elements) + - [Block 19 Tensor Group : ~199M Elements](#block-19-tensor-group--199m-elements) + - [Block 20 Tensor Group : ~199M Elements](#block-20-tensor-group--199m-elements) + - [Block 21 Tensor Group : ~199M Elements](#block-21-tensor-group--199m-elements) + - [Block 22 Tensor Group : ~199M Elements](#block-22-tensor-group--199m-elements) + - [Block 23 Tensor Group : ~199M Elements](#block-23-tensor-group--199m-elements) + - [Block 24 Tensor Group : ~199M Elements](#block-24-tensor-group--199m-elements) + - [Block 25 Tensor Group : ~199M Elements](#block-25-tensor-group--199m-elements) + - [Block 26 Tensor Group : ~199M Elements](#block-26-tensor-group--199m-elements) + - [Block 27 Tensor Group : ~199M Elements](#block-27-tensor-group--199m-elements) + - [Block 28 Tensor Group : ~199M Elements](#block-28-tensor-group--199m-elements) + - [Block 29 Tensor Group : ~199M Elements](#block-29-tensor-group--199m-elements) + - [Block 30 Tensor Group : ~199M Elements](#block-30-tensor-group--199m-elements) + - [Block 31 Tensor Group : ~199M Elements](#block-31-tensor-group--199m-elements) + - [Block 32 Tensor Group : ~199M Elements](#block-32-tensor-group--199m-elements) + - [Block 33 Tensor Group : ~199M Elements](#block-33-tensor-group--199m-elements) + - [Block 34 Tensor Group : ~199M Elements](#block-34-tensor-group--199m-elements) + - [Block 35 Tensor Group : ~199M Elements](#block-35-tensor-group--199m-elements) + - [Block 36 Tensor Group : ~199M Elements](#block-36-tensor-group--199m-elements) + - [Block 37 Tensor Group : ~199M Elements](#block-37-tensor-group--199m-elements) + - [Block 38 Tensor Group : ~199M Elements](#block-38-tensor-group--199m-elements) + - [Block 39 Tensor Group : ~199M Elements](#block-39-tensor-group--199m-elements) + +### Tensor Data Offset + +This table contains the offset and data segment relative to start of file + +| T_ID | Tensor Layer Name | Data Offset (B) | Data Size (B) | +|-----:|:--------------------------|-----------------:|-----------------:| +| 0 | output.weight | 0x367400 | 0x1a080000 | +| 1 | output_norm.weight | 0x1a3e7400 | 0x4000 | +| 2 | token_embd.weight | 0x1a3eb400 | 0x1a080000 | +| 3 | blk.0.attn_k.weight | 0x3446b400 | 0x348000 | +| 4 | blk.0.attn_norm.weight | 0x347b3400 | 0x4000 | +| 5 | blk.0.attn_output.weight | 0x347b7400 | 0xd20000 | +| 6 | blk.0.attn_q.weight | 0x354d7400 | 0xc00000 | +| 7 | blk.0.attn_v.weight | 0x360d7400 | 0x440000 | +| 8 | blk.0.ffn_down.weight | 0x36517400 | 0x2580000 | +| 9 | blk.0.ffn_gate.weight | 0x38a97400 | 0x2904000 | +| 10 | blk.0.ffn_norm.weight | 0x3b39b400 | 0x4000 | +| 11 | blk.0.ffn_up.weight | 0x3b39f400 | 0x3520000 | +| 12 | blk.1.attn_k.weight | 0x3e8bf400 | 0x348000 | +| 13 | blk.1.attn_norm.weight | 0x3ec07400 | 0x4000 | +| 14 | blk.1.attn_output.weight | 0x3ec0b400 | 0x1100000 | +| 15 | blk.1.attn_q.weight | 0x3fd0b400 | 0xd20000 | +| 16 | blk.1.attn_v.weight | 0x40a2b400 | 0x440000 | +| 17 | blk.1.ffn_down.weight | 0x40e6b400 | 0x2904000 | +| 18 | blk.1.ffn_gate.weight | 0x4376f400 | 0x2904000 | +| 19 | blk.1.ffn_norm.weight | 0x46073400 | 0x4000 | +| 20 | blk.1.ffn_up.weight | 0x46077400 | 0x3520000 | +| 21 | blk.2.attn_k.weight | 0x49597400 | 0x348000 | +| 22 | blk.2.attn_norm.weight | 0x498df400 | 0x4000 | +| 23 | blk.2.attn_output.weight | 0x498e3400 | 0x1100000 | +| 24 | blk.2.attn_q.weight | 0x4a9e3400 | 0xd20000 | +| 25 | blk.2.attn_v.weight | 0x4b703400 | 0x440000 | +| 26 | blk.2.ffn_down.weight | 0x4bb43400 | 0x2904000 | +| 27 | blk.2.ffn_gate.weight | 0x4e447400 | 0x2904000 | +| 28 | blk.2.ffn_norm.weight | 0x50d4b400 | 0x4000 | +| 29 | blk.2.ffn_up.weight | 0x50d4f400 | 0x3520000 | +| 30 | blk.3.attn_k.weight | 0x5426f400 | 0x348000 | +| 31 | blk.3.attn_norm.weight | 0x545b7400 | 0x4000 | +| 32 | blk.3.attn_output.weight | 0x545bb400 | 0x1100000 | +| 33 | blk.3.attn_q.weight | 0x556bb400 | 0xd20000 | +| 34 | blk.3.attn_v.weight | 0x563db400 | 0x440000 | +| 35 | blk.3.ffn_down.weight | 0x5681b400 | 0x2580000 | +| 36 | blk.3.ffn_gate.weight | 0x58d9b400 | 0x3520000 | +| 37 | blk.3.ffn_norm.weight | 0x5c2bb400 | 0x4000 | +| 38 | blk.3.ffn_up.weight | 0x5c2bf400 | 0x3520000 | +| 39 | blk.4.attn_k.weight | 0x5f7df400 | 0x348000 | +| 40 | blk.4.attn_norm.weight | 0x5fb27400 | 0x4000 | +| 41 | blk.4.attn_output.weight | 0x5fb2b400 | 0x1100000 | +| 42 | blk.4.attn_q.weight | 0x60c2b400 | 0x1100000 | +| 43 | blk.4.attn_v.weight | 0x61d2b400 | 0x440000 | +| 44 | blk.4.ffn_down.weight | 0x6216b400 | 0x2904000 | +| 45 | blk.4.ffn_gate.weight | 0x64a6f400 | 0x2904000 | +| 46 | blk.4.ffn_norm.weight | 0x67373400 | 0x4000 | +| 47 | blk.4.ffn_up.weight | 0x67377400 | 0x3520000 | +| 48 | blk.5.attn_k.weight | 0x6a897400 | 0x348000 | +| 49 | blk.5.attn_norm.weight | 0x6abdf400 | 0x4000 | +| 50 | blk.5.attn_output.weight | 0x6abe3400 | 0x1100000 | +| 51 | blk.5.attn_q.weight | 0x6bce3400 | 0x1100000 | +| 52 | blk.5.attn_v.weight | 0x6cde3400 | 0x440000 | +| 53 | blk.5.ffn_down.weight | 0x6d223400 | 0x2580000 | +| 54 | blk.5.ffn_gate.weight | 0x6f7a3400 | 0x3520000 | +| 55 | blk.5.ffn_norm.weight | 0x72cc3400 | 0x4000 | +| 56 | blk.5.ffn_up.weight | 0x72cc7400 | 0x3520000 | +| 57 | blk.6.attn_k.weight | 0x761e7400 | 0x440000 | +| 58 | blk.6.attn_norm.weight | 0x76627400 | 0x4000 | +| 59 | blk.6.attn_output.weight | 0x7662b400 | 0x1100000 | +| 60 | blk.6.attn_q.weight | 0x7772b400 | 0x1100000 | +| 61 | blk.6.attn_v.weight | 0x7882b400 | 0x440000 | +| 62 | blk.6.ffn_down.weight | 0x78c6b400 | 0x2904000 | +| 63 | blk.6.ffn_gate.weight | 0x7b56f400 | 0x2904000 | +| 64 | blk.6.ffn_norm.weight | 0x7de73400 | 0x4000 | +| 65 | blk.6.ffn_up.weight | 0x7de77400 | 0x3520000 | +| 66 | blk.7.attn_k.weight | 0x81397400 | 0x440000 | +| 67 | blk.7.attn_norm.weight | 0x817d7400 | 0x4000 | +| 68 | blk.7.attn_output.weight | 0x817db400 | 0x1100000 | +| 69 | blk.7.attn_q.weight | 0x828db400 | 0x1100000 | +| 70 | blk.7.attn_v.weight | 0x839db400 | 0x440000 | +| 71 | blk.7.ffn_down.weight | 0x83e1b400 | 0x2904000 | +| 72 | blk.7.ffn_gate.weight | 0x8671f400 | 0x2904000 | +| 73 | blk.7.ffn_norm.weight | 0x89023400 | 0x4000 | +| 74 | blk.7.ffn_up.weight | 0x89027400 | 0x3520000 | +| 75 | blk.8.attn_k.weight | 0x8c547400 | 0x348000 | +| 76 | blk.8.attn_norm.weight | 0x8c88f400 | 0x4000 | +| 77 | blk.8.attn_output.weight | 0x8c893400 | 0x1100000 | +| 78 | blk.8.attn_q.weight | 0x8d993400 | 0xd20000 | +| 79 | blk.8.attn_v.weight | 0x8e6b3400 | 0x440000 | +| 80 | blk.8.ffn_down.weight | 0x8eaf3400 | 0x2904000 | +| 81 | blk.8.ffn_gate.weight | 0x913f7400 | 0x2904000 | +| 82 | blk.8.ffn_norm.weight | 0x93cfb400 | 0x4000 | +| 83 | blk.8.ffn_up.weight | 0x93cff400 | 0x3520000 | +| 84 | blk.9.attn_k.weight | 0x9721f400 | 0x348000 | +| 85 | blk.9.attn_norm.weight | 0x97567400 | 0x4000 | +| 86 | blk.9.attn_output.weight | 0x9756b400 | 0x1100000 | +| 87 | blk.9.attn_q.weight | 0x9866b400 | 0xd20000 | +| 88 | blk.9.attn_v.weight | 0x9938b400 | 0x440000 | +| 89 | blk.9.ffn_down.weight | 0x997cb400 | 0x2904000 | +| 90 | blk.9.ffn_gate.weight | 0x9c0cf400 | 0x2904000 | +| 91 | blk.9.ffn_norm.weight | 0x9e9d3400 | 0x4000 | +| 92 | blk.9.ffn_up.weight | 0x9e9d7400 | 0x3520000 | +| 93 | blk.10.attn_k.weight | 0xa1ef7400 | 0x348000 | +| 94 | blk.10.attn_norm.weight | 0xa223f400 | 0x4000 | +| 95 | blk.10.attn_output.weight | 0xa2243400 | 0x1100000 | +| 96 | blk.10.attn_q.weight | 0xa3343400 | 0xd20000 | +| 97 | blk.10.attn_v.weight | 0xa4063400 | 0x440000 | +| 98 | blk.10.ffn_down.weight | 0xa44a3400 | 0x2904000 | +| 99 | blk.10.ffn_gate.weight | 0xa6da7400 | 0x2904000 | +| 100 | blk.10.ffn_norm.weight | 0xa96ab400 | 0x4000 | +| 101 | blk.10.ffn_up.weight | 0xa96af400 | 0x3520000 | +| 102 | blk.11.attn_k.weight | 0xacbcf400 | 0x348000 | +| 103 | blk.11.attn_norm.weight | 0xacf17400 | 0x4000 | +| 104 | blk.11.attn_output.weight | 0xacf1b400 | 0x1100000 | +| 105 | blk.11.attn_q.weight | 0xae01b400 | 0xd20000 | +| 106 | blk.11.attn_v.weight | 0xaed3b400 | 0x440000 | +| 107 | blk.11.ffn_down.weight | 0xaf17b400 | 0x2904000 | +| 108 | blk.11.ffn_gate.weight | 0xb1a7f400 | 0x2904000 | +| 109 | blk.11.ffn_norm.weight | 0xb4383400 | 0x4000 | +| 110 | blk.11.ffn_up.weight | 0xb4387400 | 0x3520000 | +| 111 | blk.12.attn_k.weight | 0xb78a7400 | 0x348000 | +| 112 | blk.12.attn_norm.weight | 0xb7bef400 | 0x4000 | +| 113 | blk.12.attn_output.weight | 0xb7bf3400 | 0x1100000 | +| 114 | blk.12.attn_q.weight | 0xb8cf3400 | 0xd20000 | +| 115 | blk.12.attn_v.weight | 0xb9a13400 | 0x440000 | +| 116 | blk.12.ffn_down.weight | 0xb9e53400 | 0x2904000 | +| 117 | blk.12.ffn_gate.weight | 0xbc757400 | 0x2904000 | +| 118 | blk.12.ffn_norm.weight | 0xbf05b400 | 0x4000 | +| 119 | blk.12.ffn_up.weight | 0xbf05f400 | 0x3520000 | +| 120 | blk.13.attn_k.weight | 0xc257f400 | 0x348000 | +| 121 | blk.13.attn_norm.weight | 0xc28c7400 | 0x4000 | +| 122 | blk.13.attn_output.weight | 0xc28cb400 | 0x1100000 | +| 123 | blk.13.attn_q.weight | 0xc39cb400 | 0xd20000 | +| 124 | blk.13.attn_v.weight | 0xc46eb400 | 0x440000 | +| 125 | blk.13.ffn_down.weight | 0xc4b2b400 | 0x2904000 | +| 126 | blk.13.ffn_gate.weight | 0xc742f400 | 0x2904000 | +| 127 | blk.13.ffn_norm.weight | 0xc9d33400 | 0x4000 | +| 128 | blk.13.ffn_up.weight | 0xc9d37400 | 0x3520000 | +| 129 | blk.14.attn_k.weight | 0xcd257400 | 0x348000 | +| 130 | blk.14.attn_norm.weight | 0xcd59f400 | 0x4000 | +| 131 | blk.14.attn_output.weight | 0xcd5a3400 | 0x1100000 | +| 132 | blk.14.attn_q.weight | 0xce6a3400 | 0xd20000 | +| 133 | blk.14.attn_v.weight | 0xcf3c3400 | 0x440000 | +| 134 | blk.14.ffn_down.weight | 0xcf803400 | 0x2904000 | +| 135 | blk.14.ffn_gate.weight | 0xd2107400 | 0x2904000 | +| 136 | blk.14.ffn_norm.weight | 0xd4a0b400 | 0x4000 | +| 137 | blk.14.ffn_up.weight | 0xd4a0f400 | 0x3520000 | +| 138 | blk.15.attn_k.weight | 0xd7f2f400 | 0x440000 | +| 139 | blk.15.attn_norm.weight | 0xd836f400 | 0x4000 | +| 140 | blk.15.attn_output.weight | 0xd8373400 | 0x1100000 | +| 141 | blk.15.attn_q.weight | 0xd9473400 | 0x1100000 | +| 142 | blk.15.attn_v.weight | 0xda573400 | 0x440000 | +| 143 | blk.15.ffn_down.weight | 0xda9b3400 | 0x2904000 | +| 144 | blk.15.ffn_gate.weight | 0xdd2b7400 | 0x2904000 | +| 145 | blk.15.ffn_norm.weight | 0xdfbbb400 | 0x4000 | +| 146 | blk.15.ffn_up.weight | 0xdfbbf400 | 0x3520000 | +| 147 | blk.16.attn_k.weight | 0xe30df400 | 0x440000 | +| 148 | blk.16.attn_norm.weight | 0xe351f400 | 0x4000 | +| 149 | blk.16.attn_output.weight | 0xe3523400 | 0x1100000 | +| 150 | blk.16.attn_q.weight | 0xe4623400 | 0x1100000 | +| 151 | blk.16.attn_v.weight | 0xe5723400 | 0x440000 | +| 152 | blk.16.ffn_down.weight | 0xe5b63400 | 0x2904000 | +| 153 | blk.16.ffn_gate.weight | 0xe8467400 | 0x2904000 | +| 154 | blk.16.ffn_norm.weight | 0xead6b400 | 0x4000 | +| 155 | blk.16.ffn_up.weight | 0xead6f400 | 0x3520000 | +| 156 | blk.17.attn_k.weight | 0xee28f400 | 0x348000 | +| 157 | blk.17.attn_norm.weight | 0xee5d7400 | 0x4000 | +| 158 | blk.17.attn_output.weight | 0xee5db400 | 0x1100000 | +| 159 | blk.17.attn_q.weight | 0xef6db400 | 0xd20000 | +| 160 | blk.17.attn_v.weight | 0xf03fb400 | 0x440000 | +| 161 | blk.17.ffn_down.weight | 0xf083b400 | 0x2904000 | +| 162 | blk.17.ffn_gate.weight | 0xf313f400 | 0x2904000 | +| 163 | blk.17.ffn_norm.weight | 0xf5a43400 | 0x4000 | +| 164 | blk.17.ffn_up.weight | 0xf5a47400 | 0x3520000 | +| 165 | blk.18.attn_k.weight | 0xf8f67400 | 0x348000 | +| 166 | blk.18.attn_norm.weight | 0xf92af400 | 0x4000 | +| 167 | blk.18.attn_output.weight | 0xf92b3400 | 0x1100000 | +| 168 | blk.18.attn_q.weight | 0xfa3b3400 | 0xd20000 | +| 169 | blk.18.attn_v.weight | 0xfb0d3400 | 0x440000 | +| 170 | blk.18.ffn_down.weight | 0xfb513400 | 0x2904000 | +| 171 | blk.18.ffn_gate.weight | 0xfde17400 | 0x2904000 | +| 172 | blk.18.ffn_norm.weight | 0x10071b400 | 0x4000 | +| 173 | blk.18.ffn_up.weight | 0x10071f400 | 0x3520000 | +| 174 | blk.19.attn_k.weight | 0x103c3f400 | 0x348000 | +| 175 | blk.19.attn_norm.weight | 0x103f87400 | 0x4000 | +| 176 | blk.19.attn_output.weight | 0x103f8b400 | 0x1100000 | +| 177 | blk.19.attn_q.weight | 0x10508b400 | 0xd20000 | +| 178 | blk.19.attn_v.weight | 0x105dab400 | 0x440000 | +| 179 | blk.19.ffn_down.weight | 0x1061eb400 | 0x2904000 | +| 180 | blk.19.ffn_gate.weight | 0x108aef400 | 0x2904000 | +| 181 | blk.19.ffn_norm.weight | 0x10b3f3400 | 0x4000 | +| 182 | blk.19.ffn_up.weight | 0x10b3f7400 | 0x3520000 | +| 183 | blk.20.attn_k.weight | 0x10e917400 | 0x348000 | +| 184 | blk.20.attn_norm.weight | 0x10ec5f400 | 0x4000 | +| 185 | blk.20.attn_output.weight | 0x10ec63400 | 0x1100000 | +| 186 | blk.20.attn_q.weight | 0x10fd63400 | 0x1100000 | +| 187 | blk.20.attn_v.weight | 0x110e63400 | 0x440000 | +| 188 | blk.20.ffn_down.weight | 0x1112a3400 | 0x2904000 | +| 189 | blk.20.ffn_gate.weight | 0x113ba7400 | 0x2904000 | +| 190 | blk.20.ffn_norm.weight | 0x1164ab400 | 0x4000 | +| 191 | blk.20.ffn_up.weight | 0x1164af400 | 0x3520000 | +| 192 | blk.21.attn_k.weight | 0x1199cf400 | 0x348000 | +| 193 | blk.21.attn_norm.weight | 0x119d17400 | 0x4000 | +| 194 | blk.21.attn_output.weight | 0x119d1b400 | 0x1100000 | +| 195 | blk.21.attn_q.weight | 0x11ae1b400 | 0xd20000 | +| 196 | blk.21.attn_v.weight | 0x11bb3b400 | 0x440000 | +| 197 | blk.21.ffn_down.weight | 0x11bf7b400 | 0x2904000 | +| 198 | blk.21.ffn_gate.weight | 0x11e87f400 | 0x2904000 | +| 199 | blk.21.ffn_norm.weight | 0x121183400 | 0x4000 | +| 200 | blk.21.ffn_up.weight | 0x121187400 | 0x3520000 | +| 201 | blk.22.attn_k.weight | 0x1246a7400 | 0x348000 | +| 202 | blk.22.attn_norm.weight | 0x1249ef400 | 0x4000 | +| 203 | blk.22.attn_output.weight | 0x1249f3400 | 0x1100000 | +| 204 | blk.22.attn_q.weight | 0x125af3400 | 0xd20000 | +| 205 | blk.22.attn_v.weight | 0x126813400 | 0x440000 | +| 206 | blk.22.ffn_down.weight | 0x126c53400 | 0x2904000 | +| 207 | blk.22.ffn_gate.weight | 0x129557400 | 0x2904000 | +| 208 | blk.22.ffn_norm.weight | 0x12be5b400 | 0x4000 | +| 209 | blk.22.ffn_up.weight | 0x12be5f400 | 0x3520000 | +| 210 | blk.23.attn_k.weight | 0x12f37f400 | 0x348000 | +| 211 | blk.23.attn_norm.weight | 0x12f6c7400 | 0x4000 | +| 212 | blk.23.attn_output.weight | 0x12f6cb400 | 0x1100000 | +| 213 | blk.23.attn_q.weight | 0x1307cb400 | 0x1100000 | +| 214 | blk.23.attn_v.weight | 0x1318cb400 | 0x440000 | +| 215 | blk.23.ffn_down.weight | 0x131d0b400 | 0x2904000 | +| 216 | blk.23.ffn_gate.weight | 0x13460f400 | 0x2904000 | +| 217 | blk.23.ffn_norm.weight | 0x136f13400 | 0x4000 | +| 218 | blk.23.ffn_up.weight | 0x136f17400 | 0x3520000 | +| 219 | blk.24.attn_k.weight | 0x13a437400 | 0x348000 | +| 220 | blk.24.attn_norm.weight | 0x13a77f400 | 0x4000 | +| 221 | blk.24.attn_output.weight | 0x13a783400 | 0x1100000 | +| 222 | blk.24.attn_q.weight | 0x13b883400 | 0xd20000 | +| 223 | blk.24.attn_v.weight | 0x13c5a3400 | 0x440000 | +| 224 | blk.24.ffn_down.weight | 0x13c9e3400 | 0x2904000 | +| 225 | blk.24.ffn_gate.weight | 0x13f2e7400 | 0x2904000 | +| 226 | blk.24.ffn_norm.weight | 0x141beb400 | 0x4000 | +| 227 | blk.24.ffn_up.weight | 0x141bef400 | 0x3520000 | +| 228 | blk.25.attn_k.weight | 0x14510f400 | 0x348000 | +| 229 | blk.25.attn_norm.weight | 0x145457400 | 0x4000 | +| 230 | blk.25.attn_output.weight | 0x14545b400 | 0x1100000 | +| 231 | blk.25.attn_q.weight | 0x14655b400 | 0xd20000 | +| 232 | blk.25.attn_v.weight | 0x14727b400 | 0x440000 | +| 233 | blk.25.ffn_down.weight | 0x1476bb400 | 0x2904000 | +| 234 | blk.25.ffn_gate.weight | 0x149fbf400 | 0x2904000 | +| 235 | blk.25.ffn_norm.weight | 0x14c8c3400 | 0x4000 | +| 236 | blk.25.ffn_up.weight | 0x14c8c7400 | 0x3520000 | +| 237 | blk.26.attn_k.weight | 0x14fde7400 | 0x348000 | +| 238 | blk.26.attn_norm.weight | 0x15012f400 | 0x4000 | +| 239 | blk.26.attn_output.weight | 0x150133400 | 0x1100000 | +| 240 | blk.26.attn_q.weight | 0x151233400 | 0xd20000 | +| 241 | blk.26.attn_v.weight | 0x151f53400 | 0x440000 | +| 242 | blk.26.ffn_down.weight | 0x152393400 | 0x2904000 | +| 243 | blk.26.ffn_gate.weight | 0x154c97400 | 0x2904000 | +| 244 | blk.26.ffn_norm.weight | 0x15759b400 | 0x4000 | +| 245 | blk.26.ffn_up.weight | 0x15759f400 | 0x3520000 | +| 246 | blk.27.attn_k.weight | 0x15aabf400 | 0x348000 | +| 247 | blk.27.attn_norm.weight | 0x15ae07400 | 0x4000 | +| 248 | blk.27.attn_output.weight | 0x15ae0b400 | 0x1100000 | +| 249 | blk.27.attn_q.weight | 0x15bf0b400 | 0x1100000 | +| 250 | blk.27.attn_v.weight | 0x15d00b400 | 0x440000 | +| 251 | blk.27.ffn_down.weight | 0x15d44b400 | 0x2904000 | +| 252 | blk.27.ffn_gate.weight | 0x15fd4f400 | 0x2904000 | +| 253 | blk.27.ffn_norm.weight | 0x162653400 | 0x4000 | +| 254 | blk.27.ffn_up.weight | 0x162657400 | 0x3520000 | +| 255 | blk.28.attn_k.weight | 0x165b77400 | 0x440000 | +| 256 | blk.28.attn_norm.weight | 0x165fb7400 | 0x4000 | +| 257 | blk.28.attn_output.weight | 0x165fbb400 | 0x1100000 | +| 258 | blk.28.attn_q.weight | 0x1670bb400 | 0x1100000 | +| 259 | blk.28.attn_v.weight | 0x1681bb400 | 0x440000 | +| 260 | blk.28.ffn_down.weight | 0x1685fb400 | 0x2904000 | +| 261 | blk.28.ffn_gate.weight | 0x16aeff400 | 0x2904000 | +| 262 | blk.28.ffn_norm.weight | 0x16d803400 | 0x4000 | +| 263 | blk.28.ffn_up.weight | 0x16d807400 | 0x3520000 | +| 264 | blk.29.attn_k.weight | 0x170d27400 | 0x348000 | +| 265 | blk.29.attn_norm.weight | 0x17106f400 | 0x4000 | +| 266 | blk.29.attn_output.weight | 0x171073400 | 0x1100000 | +| 267 | blk.29.attn_q.weight | 0x172173400 | 0xd20000 | +| 268 | blk.29.attn_v.weight | 0x172e93400 | 0x440000 | +| 269 | blk.29.ffn_down.weight | 0x1732d3400 | 0x2904000 | +| 270 | blk.29.ffn_gate.weight | 0x175bd7400 | 0x2904000 | +| 271 | blk.29.ffn_norm.weight | 0x1784db400 | 0x4000 | +| 272 | blk.29.ffn_up.weight | 0x1784df400 | 0x3520000 | +| 273 | blk.30.attn_k.weight | 0x17b9ff400 | 0x440000 | +| 274 | blk.30.attn_norm.weight | 0x17be3f400 | 0x4000 | +| 275 | blk.30.attn_output.weight | 0x17be43400 | 0x1100000 | +| 276 | blk.30.attn_q.weight | 0x17cf43400 | 0x1100000 | +| 277 | blk.30.attn_v.weight | 0x17e043400 | 0x440000 | +| 278 | blk.30.ffn_down.weight | 0x17e483400 | 0x2904000 | +| 279 | blk.30.ffn_gate.weight | 0x180d87400 | 0x2904000 | +| 280 | blk.30.ffn_norm.weight | 0x18368b400 | 0x4000 | +| 281 | blk.30.ffn_up.weight | 0x18368f400 | 0x3520000 | +| 282 | blk.31.attn_k.weight | 0x186baf400 | 0x348000 | +| 283 | blk.31.attn_norm.weight | 0x186ef7400 | 0x4000 | +| 284 | blk.31.attn_output.weight | 0x186efb400 | 0x1100000 | +| 285 | blk.31.attn_q.weight | 0x187ffb400 | 0xd20000 | +| 286 | blk.31.attn_v.weight | 0x188d1b400 | 0x440000 | +| 287 | blk.31.ffn_down.weight | 0x18915b400 | 0x2904000 | +| 288 | blk.31.ffn_gate.weight | 0x18ba5f400 | 0x3520000 | +| 289 | blk.31.ffn_norm.weight | 0x18ef7f400 | 0x4000 | +| 290 | blk.31.ffn_up.weight | 0x18ef83400 | 0x3520000 | +| 291 | blk.32.attn_k.weight | 0x1924a3400 | 0x348000 | +| 292 | blk.32.attn_norm.weight | 0x1927eb400 | 0x4000 | +| 293 | blk.32.attn_output.weight | 0x1927ef400 | 0x1100000 | +| 294 | blk.32.attn_q.weight | 0x1938ef400 | 0x1100000 | +| 295 | blk.32.attn_v.weight | 0x1949ef400 | 0x440000 | +| 296 | blk.32.ffn_down.weight | 0x194e2f400 | 0x2904000 | +| 297 | blk.32.ffn_gate.weight | 0x197733400 | 0x2904000 | +| 298 | blk.32.ffn_norm.weight | 0x19a037400 | 0x4000 | +| 299 | blk.32.ffn_up.weight | 0x19a03b400 | 0x3520000 | +| 300 | blk.33.attn_k.weight | 0x19d55b400 | 0x348000 | +| 301 | blk.33.attn_norm.weight | 0x19d8a3400 | 0x4000 | +| 302 | blk.33.attn_output.weight | 0x19d8a7400 | 0x1100000 | +| 303 | blk.33.attn_q.weight | 0x19e9a7400 | 0xd20000 | +| 304 | blk.33.attn_v.weight | 0x19f6c7400 | 0x440000 | +| 305 | blk.33.ffn_down.weight | 0x19fb07400 | 0x2904000 | +| 306 | blk.33.ffn_gate.weight | 0x1a240b400 | 0x2904000 | +| 307 | blk.33.ffn_norm.weight | 0x1a4d0f400 | 0x4000 | +| 308 | blk.33.ffn_up.weight | 0x1a4d13400 | 0x3520000 | +| 309 | blk.34.attn_k.weight | 0x1a8233400 | 0x348000 | +| 310 | blk.34.attn_norm.weight | 0x1a857b400 | 0x4000 | +| 311 | blk.34.attn_output.weight | 0x1a857f400 | 0x1100000 | +| 312 | blk.34.attn_q.weight | 0x1a967f400 | 0x1100000 | +| 313 | blk.34.attn_v.weight | 0x1aa77f400 | 0x440000 | +| 314 | blk.34.ffn_down.weight | 0x1aabbf400 | 0x2904000 | +| 315 | blk.34.ffn_gate.weight | 0x1ad4c3400 | 0x2904000 | +| 316 | blk.34.ffn_norm.weight | 0x1afdc7400 | 0x4000 | +| 317 | blk.34.ffn_up.weight | 0x1afdcb400 | 0x3520000 | +| 318 | blk.35.attn_k.weight | 0x1b32eb400 | 0x348000 | +| 319 | blk.35.attn_norm.weight | 0x1b3633400 | 0x4000 | +| 320 | blk.35.attn_output.weight | 0x1b3637400 | 0x1100000 | +| 321 | blk.35.attn_q.weight | 0x1b4737400 | 0xd20000 | +| 322 | blk.35.attn_v.weight | 0x1b5457400 | 0x440000 | +| 323 | blk.35.ffn_down.weight | 0x1b5897400 | 0x2904000 | +| 324 | blk.35.ffn_gate.weight | 0x1b819b400 | 0x2904000 | +| 325 | blk.35.ffn_norm.weight | 0x1baa9f400 | 0x4000 | +| 326 | blk.35.ffn_up.weight | 0x1baaa3400 | 0x3520000 | +| 327 | blk.36.attn_k.weight | 0x1bdfc3400 | 0x348000 | +| 328 | blk.36.attn_norm.weight | 0x1be30b400 | 0x4000 | +| 329 | blk.36.attn_output.weight | 0x1be30f400 | 0x1100000 | +| 330 | blk.36.attn_q.weight | 0x1bf40f400 | 0xd20000 | +| 331 | blk.36.attn_v.weight | 0x1c012f400 | 0x440000 | +| 332 | blk.36.ffn_down.weight | 0x1c056f400 | 0x2904000 | +| 333 | blk.36.ffn_gate.weight | 0x1c2e73400 | 0x2904000 | +| 334 | blk.36.ffn_norm.weight | 0x1c5777400 | 0x4000 | +| 335 | blk.36.ffn_up.weight | 0x1c577b400 | 0x3520000 | +| 336 | blk.37.attn_k.weight | 0x1c8c9b400 | 0x348000 | +| 337 | blk.37.attn_norm.weight | 0x1c8fe3400 | 0x4000 | +| 338 | blk.37.attn_output.weight | 0x1c8fe7400 | 0x1100000 | +| 339 | blk.37.attn_q.weight | 0x1ca0e7400 | 0xd20000 | +| 340 | blk.37.attn_v.weight | 0x1cae07400 | 0x440000 | +| 341 | blk.37.ffn_down.weight | 0x1cb247400 | 0x2904000 | +| 342 | blk.37.ffn_gate.weight | 0x1cdb4b400 | 0x3520000 | +| 343 | blk.37.ffn_norm.weight | 0x1d106b400 | 0x4000 | +| 344 | blk.37.ffn_up.weight | 0x1d106f400 | 0x3520000 | +| 345 | blk.38.attn_k.weight | 0x1d458f400 | 0x348000 | +| 346 | blk.38.attn_norm.weight | 0x1d48d7400 | 0x4000 | +| 347 | blk.38.attn_output.weight | 0x1d48db400 | 0x1100000 | +| 348 | blk.38.attn_q.weight | 0x1d59db400 | 0xd20000 | +| 349 | blk.38.attn_v.weight | 0x1d66fb400 | 0x440000 | +| 350 | blk.38.ffn_down.weight | 0x1d6b3b400 | 0x2904000 | +| 351 | blk.38.ffn_gate.weight | 0x1d943f400 | 0x3520000 | +| 352 | blk.38.ffn_norm.weight | 0x1dc95f400 | 0x4000 | +| 353 | blk.38.ffn_up.weight | 0x1dc963400 | 0x3520000 | +| 354 | blk.39.attn_k.weight | 0x1dfe83400 | 0x348000 | +| 355 | blk.39.attn_norm.weight | 0x1e01cb400 | 0x4000 | +| 356 | blk.39.attn_output.weight | 0x1e01cf400 | 0x1100000 | +| 357 | blk.39.attn_q.weight | 0x1e12cf400 | 0xd20000 | +| 358 | blk.39.attn_v.weight | 0x1e1fef400 | 0x440000 | +| 359 | blk.39.ffn_down.weight | 0x1e242f400 | 0x2904000 | +| 360 | blk.39.ffn_gate.weight | 0x1e4d33400 | 0x3520000 | +| 361 | blk.39.ffn_norm.weight | 0x1e8253400 | 0x4000 | +| 362 | blk.39.ffn_up.weight | 0x1e8257400 | 0x3520000 | + +### Base Tensor Group : ~822M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------|:---------------------------------|:------------------|:----------------------|:-----|--------:| +| 0 | output.weight | Output (W) | (~411M) 411041792 | 4096 x 100352 x 1 x 1 | Q8_0 | 8.5000 | +| 1 | output_norm.weight | Output Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 2 | token_embd.weight | Token Embedding (W) | (~411M) 411041792 | 4096 x 100352 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in base: (~822M) 822087680 +- Percentage of total elements: 9.35% +- Bits per Weight (BPW) for base: 8.5001 bits + + +### Block 0 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 3 | blk.0.attn_k.weight | Block 0 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 4 | blk.0.attn_norm.weight | Block 0 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 5 | blk.0.attn_output.weight | Block 0 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 6 | blk.0.attn_q.weight | Block 0 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q5_1 | 6.0000 | +| 7 | blk.0.attn_v.weight | Block 0 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 8 | blk.0.ffn_down.weight | Block 0 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_1 | 6.0000 | +| 9 | blk.0.ffn_gate.weight | Block 0 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 10 | blk.0.ffn_norm.weight | Block 0 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 11 | blk.0.ffn_up.weight | Block 0 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.0: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.0: 6.9188 bits + + +### Block 1 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 12 | blk.1.attn_k.weight | Block 1 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 13 | blk.1.attn_norm.weight | Block 1 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 14 | blk.1.attn_output.weight | Block 1 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 15 | blk.1.attn_q.weight | Block 1 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 16 | blk.1.attn_v.weight | Block 1 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 17 | blk.1.ffn_down.weight | Block 1 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 18 | blk.1.ffn_gate.weight | Block 1 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 19 | blk.1.ffn_norm.weight | Block 1 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 20 | blk.1.ffn_up.weight | Block 1 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.1: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.1: 7.2773 bits + + +### Block 2 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 21 | blk.2.attn_k.weight | Block 2 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 22 | blk.2.attn_norm.weight | Block 2 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 23 | blk.2.attn_output.weight | Block 2 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 24 | blk.2.attn_q.weight | Block 2 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 25 | blk.2.attn_v.weight | Block 2 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 26 | blk.2.ffn_down.weight | Block 2 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 27 | blk.2.ffn_gate.weight | Block 2 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 28 | blk.2.ffn_norm.weight | Block 2 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 29 | blk.2.ffn_up.weight | Block 2 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.2: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.2: 7.2773 bits + + +### Block 3 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 30 | blk.3.attn_k.weight | Block 3 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 31 | blk.3.attn_norm.weight | Block 3 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 32 | blk.3.attn_output.weight | Block 3 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 33 | blk.3.attn_q.weight | Block 3 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 34 | blk.3.attn_v.weight | Block 3 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 35 | blk.3.ffn_down.weight | Block 3 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_1 | 6.0000 | +| 36 | blk.3.ffn_gate.weight | Block 3 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 37 | blk.3.ffn_norm.weight | Block 3 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 38 | blk.3.ffn_up.weight | Block 3 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.3: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.3: 7.6392 bits + + +### Block 4 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 39 | blk.4.attn_k.weight | Block 4 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 40 | blk.4.attn_norm.weight | Block 4 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 41 | blk.4.attn_output.weight | Block 4 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 42 | blk.4.attn_q.weight | Block 4 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 43 | blk.4.attn_v.weight | Block 4 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 44 | blk.4.ffn_down.weight | Block 4 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 45 | blk.4.ffn_gate.weight | Block 4 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 46 | blk.4.ffn_norm.weight | Block 4 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 47 | blk.4.ffn_up.weight | Block 4 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.4: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.4: 7.4405 bits + + +### Block 5 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 48 | blk.5.attn_k.weight | Block 5 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 49 | blk.5.attn_norm.weight | Block 5 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 50 | blk.5.attn_output.weight | Block 5 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 51 | blk.5.attn_q.weight | Block 5 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 52 | blk.5.attn_v.weight | Block 5 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 53 | blk.5.ffn_down.weight | Block 5 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_1 | 6.0000 | +| 54 | blk.5.ffn_gate.weight | Block 5 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 55 | blk.5.ffn_norm.weight | Block 5 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 56 | blk.5.ffn_up.weight | Block 5 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.5: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.5: 7.8023 bits + + +### Block 6 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 57 | blk.6.attn_k.weight | Block 6 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 58 | blk.6.attn_norm.weight | Block 6 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 59 | blk.6.attn_output.weight | Block 6 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 60 | blk.6.attn_q.weight | Block 6 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 61 | blk.6.attn_v.weight | Block 6 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 62 | blk.6.ffn_down.weight | Block 6 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 63 | blk.6.ffn_gate.weight | Block 6 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 64 | blk.6.ffn_norm.weight | Block 6 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 65 | blk.6.ffn_up.weight | Block 6 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.6: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.6: 7.4813 bits + + +### Block 7 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 66 | blk.7.attn_k.weight | Block 7 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 67 | blk.7.attn_norm.weight | Block 7 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 68 | blk.7.attn_output.weight | Block 7 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 69 | blk.7.attn_q.weight | Block 7 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 70 | blk.7.attn_v.weight | Block 7 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 71 | blk.7.ffn_down.weight | Block 7 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 72 | blk.7.ffn_gate.weight | Block 7 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 73 | blk.7.ffn_norm.weight | Block 7 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 74 | blk.7.ffn_up.weight | Block 7 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.7: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.7: 7.4813 bits + + +### Block 8 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 75 | blk.8.attn_k.weight | Block 8 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 76 | blk.8.attn_norm.weight | Block 8 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 77 | blk.8.attn_output.weight | Block 8 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 78 | blk.8.attn_q.weight | Block 8 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 79 | blk.8.attn_v.weight | Block 8 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 80 | blk.8.ffn_down.weight | Block 8 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 81 | blk.8.ffn_gate.weight | Block 8 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 82 | blk.8.ffn_norm.weight | Block 8 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 83 | blk.8.ffn_up.weight | Block 8 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.8: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.8: 7.2773 bits + + +### Block 9 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 84 | blk.9.attn_k.weight | Block 9 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 85 | blk.9.attn_norm.weight | Block 9 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 86 | blk.9.attn_output.weight | Block 9 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 87 | blk.9.attn_q.weight | Block 9 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 88 | blk.9.attn_v.weight | Block 9 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 89 | blk.9.ffn_down.weight | Block 9 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 90 | blk.9.ffn_gate.weight | Block 9 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 91 | blk.9.ffn_norm.weight | Block 9 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 92 | blk.9.ffn_up.weight | Block 9 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.9: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.9: 7.2773 bits + + +### Block 10 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 93 | blk.10.attn_k.weight | Block 10 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 94 | blk.10.attn_norm.weight | Block 10 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 95 | blk.10.attn_output.weight | Block 10 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 96 | blk.10.attn_q.weight | Block 10 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 97 | blk.10.attn_v.weight | Block 10 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 98 | blk.10.ffn_down.weight | Block 10 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 99 | blk.10.ffn_gate.weight | Block 10 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 100 | blk.10.ffn_norm.weight | Block 10 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 101 | blk.10.ffn_up.weight | Block 10 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.10: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.10: 7.2773 bits + + +### Block 11 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 102 | blk.11.attn_k.weight | Block 11 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 103 | blk.11.attn_norm.weight | Block 11 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 104 | blk.11.attn_output.weight | Block 11 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 105 | blk.11.attn_q.weight | Block 11 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 106 | blk.11.attn_v.weight | Block 11 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 107 | blk.11.ffn_down.weight | Block 11 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 108 | blk.11.ffn_gate.weight | Block 11 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 109 | blk.11.ffn_norm.weight | Block 11 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 110 | blk.11.ffn_up.weight | Block 11 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.11: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.11: 7.2773 bits + + +### Block 12 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 111 | blk.12.attn_k.weight | Block 12 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 112 | blk.12.attn_norm.weight | Block 12 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 113 | blk.12.attn_output.weight | Block 12 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 114 | blk.12.attn_q.weight | Block 12 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 115 | blk.12.attn_v.weight | Block 12 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 116 | blk.12.ffn_down.weight | Block 12 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 117 | blk.12.ffn_gate.weight | Block 12 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 118 | blk.12.ffn_norm.weight | Block 12 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 119 | blk.12.ffn_up.weight | Block 12 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.12: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.12: 7.2773 bits + + +### Block 13 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 120 | blk.13.attn_k.weight | Block 13 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 121 | blk.13.attn_norm.weight | Block 13 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 122 | blk.13.attn_output.weight | Block 13 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 123 | blk.13.attn_q.weight | Block 13 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 124 | blk.13.attn_v.weight | Block 13 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 125 | blk.13.ffn_down.weight | Block 13 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 126 | blk.13.ffn_gate.weight | Block 13 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 127 | blk.13.ffn_norm.weight | Block 13 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 128 | blk.13.ffn_up.weight | Block 13 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.13: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.13: 7.2773 bits + + +### Block 14 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 129 | blk.14.attn_k.weight | Block 14 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 130 | blk.14.attn_norm.weight | Block 14 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 131 | blk.14.attn_output.weight | Block 14 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 132 | blk.14.attn_q.weight | Block 14 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 133 | blk.14.attn_v.weight | Block 14 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 134 | blk.14.ffn_down.weight | Block 14 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 135 | blk.14.ffn_gate.weight | Block 14 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 136 | blk.14.ffn_norm.weight | Block 14 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 137 | blk.14.ffn_up.weight | Block 14 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.14: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.14: 7.2773 bits + + +### Block 15 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 138 | blk.15.attn_k.weight | Block 15 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 139 | blk.15.attn_norm.weight | Block 15 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 140 | blk.15.attn_output.weight | Block 15 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 141 | blk.15.attn_q.weight | Block 15 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 142 | blk.15.attn_v.weight | Block 15 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 143 | blk.15.ffn_down.weight | Block 15 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 144 | blk.15.ffn_gate.weight | Block 15 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 145 | blk.15.ffn_norm.weight | Block 15 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 146 | blk.15.ffn_up.weight | Block 15 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.15: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.15: 7.4813 bits + + +### Block 16 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 147 | blk.16.attn_k.weight | Block 16 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 148 | blk.16.attn_norm.weight | Block 16 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 149 | blk.16.attn_output.weight | Block 16 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 150 | blk.16.attn_q.weight | Block 16 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 151 | blk.16.attn_v.weight | Block 16 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 152 | blk.16.ffn_down.weight | Block 16 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 153 | blk.16.ffn_gate.weight | Block 16 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 154 | blk.16.ffn_norm.weight | Block 16 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 155 | blk.16.ffn_up.weight | Block 16 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.16: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.16: 7.4813 bits + + +### Block 17 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 156 | blk.17.attn_k.weight | Block 17 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 157 | blk.17.attn_norm.weight | Block 17 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 158 | blk.17.attn_output.weight | Block 17 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 159 | blk.17.attn_q.weight | Block 17 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 160 | blk.17.attn_v.weight | Block 17 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 161 | blk.17.ffn_down.weight | Block 17 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 162 | blk.17.ffn_gate.weight | Block 17 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 163 | blk.17.ffn_norm.weight | Block 17 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 164 | blk.17.ffn_up.weight | Block 17 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.17: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.17: 7.2773 bits + + +### Block 18 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 165 | blk.18.attn_k.weight | Block 18 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 166 | blk.18.attn_norm.weight | Block 18 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 167 | blk.18.attn_output.weight | Block 18 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 168 | blk.18.attn_q.weight | Block 18 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 169 | blk.18.attn_v.weight | Block 18 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 170 | blk.18.ffn_down.weight | Block 18 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 171 | blk.18.ffn_gate.weight | Block 18 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 172 | blk.18.ffn_norm.weight | Block 18 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 173 | blk.18.ffn_up.weight | Block 18 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.18: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.18: 7.2773 bits + + +### Block 19 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 174 | blk.19.attn_k.weight | Block 19 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 175 | blk.19.attn_norm.weight | Block 19 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 176 | blk.19.attn_output.weight | Block 19 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 177 | blk.19.attn_q.weight | Block 19 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 178 | blk.19.attn_v.weight | Block 19 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 179 | blk.19.ffn_down.weight | Block 19 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 180 | blk.19.ffn_gate.weight | Block 19 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 181 | blk.19.ffn_norm.weight | Block 19 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 182 | blk.19.ffn_up.weight | Block 19 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.19: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.19: 7.2773 bits + + +### Block 20 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 183 | blk.20.attn_k.weight | Block 20 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 184 | blk.20.attn_norm.weight | Block 20 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 185 | blk.20.attn_output.weight | Block 20 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 186 | blk.20.attn_q.weight | Block 20 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 187 | blk.20.attn_v.weight | Block 20 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 188 | blk.20.ffn_down.weight | Block 20 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 189 | blk.20.ffn_gate.weight | Block 20 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 190 | blk.20.ffn_norm.weight | Block 20 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 191 | blk.20.ffn_up.weight | Block 20 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.20: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.20: 7.4405 bits + + +### Block 21 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 192 | blk.21.attn_k.weight | Block 21 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 193 | blk.21.attn_norm.weight | Block 21 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 194 | blk.21.attn_output.weight | Block 21 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 195 | blk.21.attn_q.weight | Block 21 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 196 | blk.21.attn_v.weight | Block 21 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 197 | blk.21.ffn_down.weight | Block 21 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 198 | blk.21.ffn_gate.weight | Block 21 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 199 | blk.21.ffn_norm.weight | Block 21 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 200 | blk.21.ffn_up.weight | Block 21 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.21: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.21: 7.2773 bits + + +### Block 22 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 201 | blk.22.attn_k.weight | Block 22 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 202 | blk.22.attn_norm.weight | Block 22 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 203 | blk.22.attn_output.weight | Block 22 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 204 | blk.22.attn_q.weight | Block 22 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 205 | blk.22.attn_v.weight | Block 22 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 206 | blk.22.ffn_down.weight | Block 22 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 207 | blk.22.ffn_gate.weight | Block 22 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 208 | blk.22.ffn_norm.weight | Block 22 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 209 | blk.22.ffn_up.weight | Block 22 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.22: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.22: 7.2773 bits + + +### Block 23 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 210 | blk.23.attn_k.weight | Block 23 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 211 | blk.23.attn_norm.weight | Block 23 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 212 | blk.23.attn_output.weight | Block 23 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 213 | blk.23.attn_q.weight | Block 23 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 214 | blk.23.attn_v.weight | Block 23 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 215 | blk.23.ffn_down.weight | Block 23 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 216 | blk.23.ffn_gate.weight | Block 23 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 217 | blk.23.ffn_norm.weight | Block 23 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 218 | blk.23.ffn_up.weight | Block 23 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.23: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.23: 7.4405 bits + + +### Block 24 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 219 | blk.24.attn_k.weight | Block 24 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 220 | blk.24.attn_norm.weight | Block 24 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 221 | blk.24.attn_output.weight | Block 24 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 222 | blk.24.attn_q.weight | Block 24 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 223 | blk.24.attn_v.weight | Block 24 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 224 | blk.24.ffn_down.weight | Block 24 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 225 | blk.24.ffn_gate.weight | Block 24 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 226 | blk.24.ffn_norm.weight | Block 24 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 227 | blk.24.ffn_up.weight | Block 24 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.24: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.24: 7.2773 bits + + +### Block 25 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 228 | blk.25.attn_k.weight | Block 25 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 229 | blk.25.attn_norm.weight | Block 25 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 230 | blk.25.attn_output.weight | Block 25 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 231 | blk.25.attn_q.weight | Block 25 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 232 | blk.25.attn_v.weight | Block 25 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 233 | blk.25.ffn_down.weight | Block 25 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 234 | blk.25.ffn_gate.weight | Block 25 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 235 | blk.25.ffn_norm.weight | Block 25 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 236 | blk.25.ffn_up.weight | Block 25 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.25: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.25: 7.2773 bits + + +### Block 26 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 237 | blk.26.attn_k.weight | Block 26 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 238 | blk.26.attn_norm.weight | Block 26 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 239 | blk.26.attn_output.weight | Block 26 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 240 | blk.26.attn_q.weight | Block 26 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 241 | blk.26.attn_v.weight | Block 26 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 242 | blk.26.ffn_down.weight | Block 26 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 243 | blk.26.ffn_gate.weight | Block 26 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 244 | blk.26.ffn_norm.weight | Block 26 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 245 | blk.26.ffn_up.weight | Block 26 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.26: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.26: 7.2773 bits + + +### Block 27 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 246 | blk.27.attn_k.weight | Block 27 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 247 | blk.27.attn_norm.weight | Block 27 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 248 | blk.27.attn_output.weight | Block 27 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 249 | blk.27.attn_q.weight | Block 27 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 250 | blk.27.attn_v.weight | Block 27 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 251 | blk.27.ffn_down.weight | Block 27 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 252 | blk.27.ffn_gate.weight | Block 27 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 253 | blk.27.ffn_norm.weight | Block 27 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 254 | blk.27.ffn_up.weight | Block 27 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.27: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.27: 7.4405 bits + + +### Block 28 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 255 | blk.28.attn_k.weight | Block 28 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 256 | blk.28.attn_norm.weight | Block 28 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 257 | blk.28.attn_output.weight | Block 28 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 258 | blk.28.attn_q.weight | Block 28 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 259 | blk.28.attn_v.weight | Block 28 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 260 | blk.28.ffn_down.weight | Block 28 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 261 | blk.28.ffn_gate.weight | Block 28 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 262 | blk.28.ffn_norm.weight | Block 28 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 263 | blk.28.ffn_up.weight | Block 28 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.28: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.28: 7.4813 bits + + +### Block 29 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 264 | blk.29.attn_k.weight | Block 29 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 265 | blk.29.attn_norm.weight | Block 29 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 266 | blk.29.attn_output.weight | Block 29 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 267 | blk.29.attn_q.weight | Block 29 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 268 | blk.29.attn_v.weight | Block 29 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 269 | blk.29.ffn_down.weight | Block 29 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 270 | blk.29.ffn_gate.weight | Block 29 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 271 | blk.29.ffn_norm.weight | Block 29 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 272 | blk.29.ffn_up.weight | Block 29 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.29: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.29: 7.2773 bits + + +### Block 30 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 273 | blk.30.attn_k.weight | Block 30 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 274 | blk.30.attn_norm.weight | Block 30 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 275 | blk.30.attn_output.weight | Block 30 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 276 | blk.30.attn_q.weight | Block 30 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 277 | blk.30.attn_v.weight | Block 30 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 278 | blk.30.ffn_down.weight | Block 30 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 279 | blk.30.ffn_gate.weight | Block 30 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 280 | blk.30.ffn_norm.weight | Block 30 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 281 | blk.30.ffn_up.weight | Block 30 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.30: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.30: 7.4813 bits + + +### Block 31 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 282 | blk.31.attn_k.weight | Block 31 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 283 | blk.31.attn_norm.weight | Block 31 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 284 | blk.31.attn_output.weight | Block 31 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 285 | blk.31.attn_q.weight | Block 31 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 286 | blk.31.attn_v.weight | Block 31 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 287 | blk.31.ffn_down.weight | Block 31 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 288 | blk.31.ffn_gate.weight | Block 31 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 289 | blk.31.ffn_norm.weight | Block 31 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 290 | blk.31.ffn_up.weight | Block 31 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.31: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.31: 7.7872 bits + + +### Block 32 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 291 | blk.32.attn_k.weight | Block 32 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 292 | blk.32.attn_norm.weight | Block 32 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 293 | blk.32.attn_output.weight | Block 32 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 294 | blk.32.attn_q.weight | Block 32 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 295 | blk.32.attn_v.weight | Block 32 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 296 | blk.32.ffn_down.weight | Block 32 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 297 | blk.32.ffn_gate.weight | Block 32 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 298 | blk.32.ffn_norm.weight | Block 32 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 299 | blk.32.ffn_up.weight | Block 32 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.32: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.32: 7.4405 bits + + +### Block 33 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 300 | blk.33.attn_k.weight | Block 33 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 301 | blk.33.attn_norm.weight | Block 33 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 302 | blk.33.attn_output.weight | Block 33 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 303 | blk.33.attn_q.weight | Block 33 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 304 | blk.33.attn_v.weight | Block 33 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 305 | blk.33.ffn_down.weight | Block 33 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 306 | blk.33.ffn_gate.weight | Block 33 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 307 | blk.33.ffn_norm.weight | Block 33 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 308 | blk.33.ffn_up.weight | Block 33 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.33: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.33: 7.2773 bits + + +### Block 34 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 309 | blk.34.attn_k.weight | Block 34 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 310 | blk.34.attn_norm.weight | Block 34 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 311 | blk.34.attn_output.weight | Block 34 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 312 | blk.34.attn_q.weight | Block 34 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 313 | blk.34.attn_v.weight | Block 34 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 314 | blk.34.ffn_down.weight | Block 34 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 315 | blk.34.ffn_gate.weight | Block 34 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 316 | blk.34.ffn_norm.weight | Block 34 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 317 | blk.34.ffn_up.weight | Block 34 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.34: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.34: 7.4405 bits + + +### Block 35 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 318 | blk.35.attn_k.weight | Block 35 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 319 | blk.35.attn_norm.weight | Block 35 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 320 | blk.35.attn_output.weight | Block 35 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 321 | blk.35.attn_q.weight | Block 35 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 322 | blk.35.attn_v.weight | Block 35 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 323 | blk.35.ffn_down.weight | Block 35 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 324 | blk.35.ffn_gate.weight | Block 35 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 325 | blk.35.ffn_norm.weight | Block 35 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 326 | blk.35.ffn_up.weight | Block 35 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.35: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.35: 7.2773 bits + + +### Block 36 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 327 | blk.36.attn_k.weight | Block 36 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 328 | blk.36.attn_norm.weight | Block 36 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 329 | blk.36.attn_output.weight | Block 36 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 330 | blk.36.attn_q.weight | Block 36 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 331 | blk.36.attn_v.weight | Block 36 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 332 | blk.36.ffn_down.weight | Block 36 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 333 | blk.36.ffn_gate.weight | Block 36 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 334 | blk.36.ffn_norm.weight | Block 36 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 335 | blk.36.ffn_up.weight | Block 36 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.36: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.36: 7.2773 bits + + +### Block 37 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 336 | blk.37.attn_k.weight | Block 37 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 337 | blk.37.attn_norm.weight | Block 37 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 338 | blk.37.attn_output.weight | Block 37 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 339 | blk.37.attn_q.weight | Block 37 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 340 | blk.37.attn_v.weight | Block 37 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 341 | blk.37.ffn_down.weight | Block 37 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 342 | blk.37.ffn_gate.weight | Block 37 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 343 | blk.37.ffn_norm.weight | Block 37 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 344 | blk.37.ffn_up.weight | Block 37 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.37: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.37: 7.7872 bits + + +### Block 38 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 345 | blk.38.attn_k.weight | Block 38 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 346 | blk.38.attn_norm.weight | Block 38 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 347 | blk.38.attn_output.weight | Block 38 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 348 | blk.38.attn_q.weight | Block 38 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 349 | blk.38.attn_v.weight | Block 38 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 350 | blk.38.ffn_down.weight | Block 38 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 351 | blk.38.ffn_gate.weight | Block 38 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 352 | blk.38.ffn_norm.weight | Block 38 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 353 | blk.38.ffn_up.weight | Block 38 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.38: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.38: 7.7872 bits + + +### Block 39 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 354 | blk.39.attn_k.weight | Block 39 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q6_K | 6.5625 | +| 355 | blk.39.attn_norm.weight | Block 39 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 356 | blk.39.attn_output.weight | Block 39 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 357 | blk.39.attn_q.weight | Block 39 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 358 | blk.39.attn_v.weight | Block 39 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 359 | blk.39.ffn_down.weight | Block 39 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 360 | blk.39.ffn_gate.weight | Block 39 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 361 | blk.39.ffn_norm.weight | Block 39 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 362 | blk.39.ffn_up.weight | Block 39 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.39: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.39: 7.7872 bits + + +Total BPW for granite-4.1-8b-Q7_K.gguf: 7.4998 bits diff --git a/scores/granite-4.1-8b-Q8_0.md b/scores/granite-4.1-8b-Q8_0.md new file mode 100644 index 0000000..87c7c52 --- /dev/null +++ b/scores/granite-4.1-8b-Q8_0.md @@ -0,0 +1,1247 @@ +# granite-4.1-8b-WIP/granite-4.1-8b-Q8_0.gguf - GGUF Internal File Dump + +- Endian: LITTLE endian + +## Key Value Metadata Store + +There are 42 key-value pairs in this file + +| POS | TYPE | Count | Key | Value | +|----:|:---------|-------:|:-----------------------------------------|:--------------------------------------------------------------------| +| 1 | UINT32 | 1 | GGUF.version | 3 | +| 2 | UINT64 | 1 | GGUF.tensor_count | 363 | +| 3 | UINT64 | 1 | GGUF.kv_count | 39 | +| 4 | STRING | 1 | general.architecture | `granite` | +| 5 | STRING | 1 | general.type | `model` | +| 6 | STRING | 1 | general.name | `Granite 4.1 8b` | +| 7 | STRING | 1 | general.basename | `granite-4.1` | +| 8 | STRING | 1 | general.size_label | `8B` | +| 9 | STRING | 1 | general.license | `apache-2.0` | +| 10 | [STRING] | 2 | general.tags | [ `language`, `granite-4.1` ] | +| 11 | UINT32 | 1 | granite.block_count | 40 | +| 12 | UINT32 | 1 | granite.context_length | 131072 | +| 13 | UINT32 | 1 | granite.embedding_length | 4096 | +| 14 | UINT32 | 1 | granite.feed_forward_length | 12800 | +| 15 | UINT32 | 1 | granite.attention.head_count | 32 | +| 16 | UINT32 | 1 | granite.attention.head_count_kv | 8 | +| 17 | FLOAT32 | 1 | granite.rope.freq_base | 10000000.0 | +| 18 | FLOAT32 | 1 | granite.attention.layer_norm_rms_epsilon | 1e-05 | +| 19 | UINT32 | 1 | granite.vocab_size | 100352 | +| 20 | UINT32 | 1 | granite.rope.dimension_count | 128 | +| 21 | FLOAT32 | 1 | granite.attention.scale | 0.0078125 | +| 22 | FLOAT32 | 1 | granite.embedding_scale | 12.0 | +| 23 | FLOAT32 | 1 | granite.residual_scale | 0.22 | +| 24 | FLOAT32 | 1 | granite.logit_scale | 16.0 | +| 25 | STRING | 1 | tokenizer.ggml.model | `gpt2` | +| 26 | STRING | 1 | tokenizer.ggml.pre | `granite-docling` | +| 27 | [STRING] | 100352 | tokenizer.ggml.tokens | [ `!`, `"`, `#`, `$`, `%`, ... ] | +| 28 | [INT32] | 100352 | tokenizer.ggml.token_type | [ 1, 1, 1, 1, 1, 1, 1, ... ] | +| 29 | [STRING] | 100000 | tokenizer.ggml.merges | [ `Ġ Ġ`, `ĠĠ ĠĠ`, `i n`, `Ġ t`, `ĠĠĠĠ ĠĠĠĠ`, ... ] | +| 30 | UINT32 | 1 | tokenizer.ggml.bos_token_id | 100257 | +| 31 | UINT32 | 1 | tokenizer.ggml.eos_token_id | 100257 | +| 32 | UINT32 | 1 | tokenizer.ggml.unknown_token_id | 100269 | +| 33 | UINT32 | 1 | tokenizer.ggml.padding_token_id | 100256 | +| 34 | BOOL | 1 | tokenizer.ggml.add_bos_token | False | +| 35 | STRING | 1 | tokenizer.chat_template | `{%- set tools_system_message_p`...`end_of_role|>' }}{%- endif %}` | +| 36 | BOOL | 1 | tokenizer.ggml.add_space_prefix | False | +| 37 | UINT32 | 1 | general.quantization_version | 2 | +| 38 | UINT32 | 1 | general.file_type | 7 | +| 39 | STRING | 1 | quantize.imatrix.file | `imatrix/imatrix-granite-4.1-8b-medium.gguf` | +| 40 | STRING | 1 | quantize.imatrix.dataset | `../../datasets/imatrix/combined_all_medium.txt` | +| 41 | UINT32 | 1 | quantize.imatrix.entries_count | 280 | +| 42 | UINT32 | 1 | quantize.imatrix.chunks_count | 3601 | + +## Tensors Overview ~9B Elements + +Total number of elements in all tensors: 8791592960 Elements + +- [granite-4.1-8b-WIP/granite-4.1-8b-Q8\_0.gguf - GGUF Internal File Dump](#granite-41-8b-wipgranite-41-8b-q8_0gguf---gguf-internal-file-dump) + - [Key Value Metadata Store](#key-value-metadata-store) + - [Tensors Overview ~9B Elements](#tensors-overview-9b-elements) + - [Tensor Data Offset](#tensor-data-offset) + - [Base Tensor Group : ~822M Elements](#base-tensor-group--822m-elements) + - [Block 0 Tensor Group : ~199M Elements](#block-0-tensor-group--199m-elements) + - [Block 1 Tensor Group : ~199M Elements](#block-1-tensor-group--199m-elements) + - [Block 2 Tensor Group : ~199M Elements](#block-2-tensor-group--199m-elements) + - [Block 3 Tensor Group : ~199M Elements](#block-3-tensor-group--199m-elements) + - [Block 4 Tensor Group : ~199M Elements](#block-4-tensor-group--199m-elements) + - [Block 5 Tensor Group : ~199M Elements](#block-5-tensor-group--199m-elements) + - [Block 6 Tensor Group : ~199M Elements](#block-6-tensor-group--199m-elements) + - [Block 7 Tensor Group : ~199M Elements](#block-7-tensor-group--199m-elements) + - [Block 8 Tensor Group : ~199M Elements](#block-8-tensor-group--199m-elements) + - [Block 9 Tensor Group : ~199M Elements](#block-9-tensor-group--199m-elements) + - [Block 10 Tensor Group : ~199M Elements](#block-10-tensor-group--199m-elements) + - [Block 11 Tensor Group : ~199M Elements](#block-11-tensor-group--199m-elements) + - [Block 12 Tensor Group : ~199M Elements](#block-12-tensor-group--199m-elements) + - [Block 13 Tensor Group : ~199M Elements](#block-13-tensor-group--199m-elements) + - [Block 14 Tensor Group : ~199M Elements](#block-14-tensor-group--199m-elements) + - [Block 15 Tensor Group : ~199M Elements](#block-15-tensor-group--199m-elements) + - [Block 16 Tensor Group : ~199M Elements](#block-16-tensor-group--199m-elements) + - [Block 17 Tensor Group : ~199M Elements](#block-17-tensor-group--199m-elements) + - [Block 18 Tensor Group : ~199M Elements](#block-18-tensor-group--199m-elements) + - [Block 19 Tensor Group : ~199M Elements](#block-19-tensor-group--199m-elements) + - [Block 20 Tensor Group : ~199M Elements](#block-20-tensor-group--199m-elements) + - [Block 21 Tensor Group : ~199M Elements](#block-21-tensor-group--199m-elements) + - [Block 22 Tensor Group : ~199M Elements](#block-22-tensor-group--199m-elements) + - [Block 23 Tensor Group : ~199M Elements](#block-23-tensor-group--199m-elements) + - [Block 24 Tensor Group : ~199M Elements](#block-24-tensor-group--199m-elements) + - [Block 25 Tensor Group : ~199M Elements](#block-25-tensor-group--199m-elements) + - [Block 26 Tensor Group : ~199M Elements](#block-26-tensor-group--199m-elements) + - [Block 27 Tensor Group : ~199M Elements](#block-27-tensor-group--199m-elements) + - [Block 28 Tensor Group : ~199M Elements](#block-28-tensor-group--199m-elements) + - [Block 29 Tensor Group : ~199M Elements](#block-29-tensor-group--199m-elements) + - [Block 30 Tensor Group : ~199M Elements](#block-30-tensor-group--199m-elements) + - [Block 31 Tensor Group : ~199M Elements](#block-31-tensor-group--199m-elements) + - [Block 32 Tensor Group : ~199M Elements](#block-32-tensor-group--199m-elements) + - [Block 33 Tensor Group : ~199M Elements](#block-33-tensor-group--199m-elements) + - [Block 34 Tensor Group : ~199M Elements](#block-34-tensor-group--199m-elements) + - [Block 35 Tensor Group : ~199M Elements](#block-35-tensor-group--199m-elements) + - [Block 36 Tensor Group : ~199M Elements](#block-36-tensor-group--199m-elements) + - [Block 37 Tensor Group : ~199M Elements](#block-37-tensor-group--199m-elements) + - [Block 38 Tensor Group : ~199M Elements](#block-38-tensor-group--199m-elements) + - [Block 39 Tensor Group : ~199M Elements](#block-39-tensor-group--199m-elements) + +### Tensor Data Offset + +This table contains the offset and data segment relative to start of file + +| T_ID | Tensor Layer Name | Data Offset (B) | Data Size (B) | +|-----:|:--------------------------|-----------------:|-----------------:| +| 0 | output.weight | 0x367400 | 0x1a080000 | +| 1 | output_norm.weight | 0x1a3e7400 | 0x4000 | +| 2 | token_embd.weight | 0x1a3eb400 | 0x1a080000 | +| 3 | blk.0.attn_k.weight | 0x3446b400 | 0x440000 | +| 4 | blk.0.attn_norm.weight | 0x348ab400 | 0x4000 | +| 5 | blk.0.attn_output.weight | 0x348af400 | 0x1100000 | +| 6 | blk.0.attn_q.weight | 0x359af400 | 0xd20000 | +| 7 | blk.0.attn_v.weight | 0x366cf400 | 0x440000 | +| 8 | blk.0.ffn_down.weight | 0x36b0f400 | 0x3520000 | +| 9 | blk.0.ffn_gate.weight | 0x3a02f400 | 0x2904000 | +| 10 | blk.0.ffn_norm.weight | 0x3c933400 | 0x4000 | +| 11 | blk.0.ffn_up.weight | 0x3c937400 | 0x3520000 | +| 12 | blk.1.attn_k.weight | 0x3fe57400 | 0x440000 | +| 13 | blk.1.attn_norm.weight | 0x40297400 | 0x4000 | +| 14 | blk.1.attn_output.weight | 0x4029b400 | 0x1100000 | +| 15 | blk.1.attn_q.weight | 0x4139b400 | 0x1100000 | +| 16 | blk.1.attn_v.weight | 0x4249b400 | 0x440000 | +| 17 | blk.1.ffn_down.weight | 0x428db400 | 0x3520000 | +| 18 | blk.1.ffn_gate.weight | 0x45dfb400 | 0x3520000 | +| 19 | blk.1.ffn_norm.weight | 0x4931b400 | 0x4000 | +| 20 | blk.1.ffn_up.weight | 0x4931f400 | 0x3520000 | +| 21 | blk.2.attn_k.weight | 0x4c83f400 | 0x440000 | +| 22 | blk.2.attn_norm.weight | 0x4cc7f400 | 0x4000 | +| 23 | blk.2.attn_output.weight | 0x4cc83400 | 0x1100000 | +| 24 | blk.2.attn_q.weight | 0x4dd83400 | 0x1100000 | +| 25 | blk.2.attn_v.weight | 0x4ee83400 | 0x440000 | +| 26 | blk.2.ffn_down.weight | 0x4f2c3400 | 0x3520000 | +| 27 | blk.2.ffn_gate.weight | 0x527e3400 | 0x3520000 | +| 28 | blk.2.ffn_norm.weight | 0x55d03400 | 0x4000 | +| 29 | blk.2.ffn_up.weight | 0x55d07400 | 0x3520000 | +| 30 | blk.3.attn_k.weight | 0x59227400 | 0x440000 | +| 31 | blk.3.attn_norm.weight | 0x59667400 | 0x4000 | +| 32 | blk.3.attn_output.weight | 0x5966b400 | 0x1100000 | +| 33 | blk.3.attn_q.weight | 0x5a76b400 | 0x1100000 | +| 34 | blk.3.attn_v.weight | 0x5b86b400 | 0x800000 | +| 35 | blk.3.ffn_down.weight | 0x5c06b400 | 0x2580000 | +| 36 | blk.3.ffn_gate.weight | 0x5e5eb400 | 0x3520000 | +| 37 | blk.3.ffn_norm.weight | 0x61b0b400 | 0x4000 | +| 38 | blk.3.ffn_up.weight | 0x61b0f400 | 0x3520000 | +| 39 | blk.4.attn_k.weight | 0x6502f400 | 0x440000 | +| 40 | blk.4.attn_norm.weight | 0x6546f400 | 0x4000 | +| 41 | blk.4.attn_output.weight | 0x65473400 | 0x1100000 | +| 42 | blk.4.attn_q.weight | 0x66573400 | 0x1100000 | +| 43 | blk.4.attn_v.weight | 0x67673400 | 0x800000 | +| 44 | blk.4.ffn_down.weight | 0x67e73400 | 0x3520000 | +| 45 | blk.4.ffn_gate.weight | 0x6b393400 | 0x3520000 | +| 46 | blk.4.ffn_norm.weight | 0x6e8b3400 | 0x4000 | +| 47 | blk.4.ffn_up.weight | 0x6e8b7400 | 0x3520000 | +| 48 | blk.5.attn_k.weight | 0x71dd7400 | 0x440000 | +| 49 | blk.5.attn_norm.weight | 0x72217400 | 0x4000 | +| 50 | blk.5.attn_output.weight | 0x7221b400 | 0x1100000 | +| 51 | blk.5.attn_q.weight | 0x7331b400 | 0x1100000 | +| 52 | blk.5.attn_v.weight | 0x7441b400 | 0x800000 | +| 53 | blk.5.ffn_down.weight | 0x74c1b400 | 0x2580000 | +| 54 | blk.5.ffn_gate.weight | 0x7719b400 | 0x3520000 | +| 55 | blk.5.ffn_norm.weight | 0x7a6bb400 | 0x4000 | +| 56 | blk.5.ffn_up.weight | 0x7a6bf400 | 0x3520000 | +| 57 | blk.6.attn_k.weight | 0x7dbdf400 | 0x440000 | +| 58 | blk.6.attn_norm.weight | 0x7e01f400 | 0x4000 | +| 59 | blk.6.attn_output.weight | 0x7e023400 | 0x1100000 | +| 60 | blk.6.attn_q.weight | 0x7f123400 | 0x1100000 | +| 61 | blk.6.attn_v.weight | 0x80223400 | 0x800000 | +| 62 | blk.6.ffn_down.weight | 0x80a23400 | 0x3520000 | +| 63 | blk.6.ffn_gate.weight | 0x83f43400 | 0x3520000 | +| 64 | blk.6.ffn_norm.weight | 0x87463400 | 0x4000 | +| 65 | blk.6.ffn_up.weight | 0x87467400 | 0x3520000 | +| 66 | blk.7.attn_k.weight | 0x8a987400 | 0x440000 | +| 67 | blk.7.attn_norm.weight | 0x8adc7400 | 0x4000 | +| 68 | blk.7.attn_output.weight | 0x8adcb400 | 0x1100000 | +| 69 | blk.7.attn_q.weight | 0x8becb400 | 0x1100000 | +| 70 | blk.7.attn_v.weight | 0x8cfcb400 | 0x800000 | +| 71 | blk.7.ffn_down.weight | 0x8d7cb400 | 0x3520000 | +| 72 | blk.7.ffn_gate.weight | 0x90ceb400 | 0x3520000 | +| 73 | blk.7.ffn_norm.weight | 0x9420b400 | 0x4000 | +| 74 | blk.7.ffn_up.weight | 0x9420f400 | 0x3520000 | +| 75 | blk.8.attn_k.weight | 0x9772f400 | 0x440000 | +| 76 | blk.8.attn_norm.weight | 0x97b6f400 | 0x4000 | +| 77 | blk.8.attn_output.weight | 0x97b73400 | 0x1100000 | +| 78 | blk.8.attn_q.weight | 0x98c73400 | 0x1100000 | +| 79 | blk.8.attn_v.weight | 0x99d73400 | 0x440000 | +| 80 | blk.8.ffn_down.weight | 0x9a1b3400 | 0x3520000 | +| 81 | blk.8.ffn_gate.weight | 0x9d6d3400 | 0x3520000 | +| 82 | blk.8.ffn_norm.weight | 0xa0bf3400 | 0x4000 | +| 83 | blk.8.ffn_up.weight | 0xa0bf7400 | 0x3520000 | +| 84 | blk.9.attn_k.weight | 0xa4117400 | 0x440000 | +| 85 | blk.9.attn_norm.weight | 0xa4557400 | 0x4000 | +| 86 | blk.9.attn_output.weight | 0xa455b400 | 0x1100000 | +| 87 | blk.9.attn_q.weight | 0xa565b400 | 0x1100000 | +| 88 | blk.9.attn_v.weight | 0xa675b400 | 0x800000 | +| 89 | blk.9.ffn_down.weight | 0xa6f5b400 | 0x3520000 | +| 90 | blk.9.ffn_gate.weight | 0xaa47b400 | 0x3520000 | +| 91 | blk.9.ffn_norm.weight | 0xad99b400 | 0x4000 | +| 92 | blk.9.ffn_up.weight | 0xad99f400 | 0x3520000 | +| 93 | blk.10.attn_k.weight | 0xb0ebf400 | 0x440000 | +| 94 | blk.10.attn_norm.weight | 0xb12ff400 | 0x4000 | +| 95 | blk.10.attn_output.weight | 0xb1303400 | 0x1100000 | +| 96 | blk.10.attn_q.weight | 0xb2403400 | 0x1100000 | +| 97 | blk.10.attn_v.weight | 0xb3503400 | 0x440000 | +| 98 | blk.10.ffn_down.weight | 0xb3943400 | 0x3520000 | +| 99 | blk.10.ffn_gate.weight | 0xb6e63400 | 0x3520000 | +| 100 | blk.10.ffn_norm.weight | 0xba383400 | 0x4000 | +| 101 | blk.10.ffn_up.weight | 0xba387400 | 0x3520000 | +| 102 | blk.11.attn_k.weight | 0xbd8a7400 | 0x440000 | +| 103 | blk.11.attn_norm.weight | 0xbdce7400 | 0x4000 | +| 104 | blk.11.attn_output.weight | 0xbdceb400 | 0x1100000 | +| 105 | blk.11.attn_q.weight | 0xbedeb400 | 0x1100000 | +| 106 | blk.11.attn_v.weight | 0xbfeeb400 | 0x440000 | +| 107 | blk.11.ffn_down.weight | 0xc032b400 | 0x3520000 | +| 108 | blk.11.ffn_gate.weight | 0xc384b400 | 0x3520000 | +| 109 | blk.11.ffn_norm.weight | 0xc6d6b400 | 0x4000 | +| 110 | blk.11.ffn_up.weight | 0xc6d6f400 | 0x3520000 | +| 111 | blk.12.attn_k.weight | 0xca28f400 | 0x440000 | +| 112 | blk.12.attn_norm.weight | 0xca6cf400 | 0x4000 | +| 113 | blk.12.attn_output.weight | 0xca6d3400 | 0x1100000 | +| 114 | blk.12.attn_q.weight | 0xcb7d3400 | 0x1100000 | +| 115 | blk.12.attn_v.weight | 0xcc8d3400 | 0x440000 | +| 116 | blk.12.ffn_down.weight | 0xccd13400 | 0x3520000 | +| 117 | blk.12.ffn_gate.weight | 0xd0233400 | 0x3520000 | +| 118 | blk.12.ffn_norm.weight | 0xd3753400 | 0x4000 | +| 119 | blk.12.ffn_up.weight | 0xd3757400 | 0x3520000 | +| 120 | blk.13.attn_k.weight | 0xd6c77400 | 0x440000 | +| 121 | blk.13.attn_norm.weight | 0xd70b7400 | 0x4000 | +| 122 | blk.13.attn_output.weight | 0xd70bb400 | 0x1100000 | +| 123 | blk.13.attn_q.weight | 0xd81bb400 | 0x1100000 | +| 124 | blk.13.attn_v.weight | 0xd92bb400 | 0x440000 | +| 125 | blk.13.ffn_down.weight | 0xd96fb400 | 0x3520000 | +| 126 | blk.13.ffn_gate.weight | 0xdcc1b400 | 0x3520000 | +| 127 | blk.13.ffn_norm.weight | 0xe013b400 | 0x4000 | +| 128 | blk.13.ffn_up.weight | 0xe013f400 | 0x3520000 | +| 129 | blk.14.attn_k.weight | 0xe365f400 | 0x440000 | +| 130 | blk.14.attn_norm.weight | 0xe3a9f400 | 0x4000 | +| 131 | blk.14.attn_output.weight | 0xe3aa3400 | 0x1100000 | +| 132 | blk.14.attn_q.weight | 0xe4ba3400 | 0x1100000 | +| 133 | blk.14.attn_v.weight | 0xe5ca3400 | 0x440000 | +| 134 | blk.14.ffn_down.weight | 0xe60e3400 | 0x3520000 | +| 135 | blk.14.ffn_gate.weight | 0xe9603400 | 0x3520000 | +| 136 | blk.14.ffn_norm.weight | 0xecb23400 | 0x4000 | +| 137 | blk.14.ffn_up.weight | 0xecb27400 | 0x3520000 | +| 138 | blk.15.attn_k.weight | 0xf0047400 | 0x440000 | +| 139 | blk.15.attn_norm.weight | 0xf0487400 | 0x4000 | +| 140 | blk.15.attn_output.weight | 0xf048b400 | 0x1100000 | +| 141 | blk.15.attn_q.weight | 0xf158b400 | 0x1100000 | +| 142 | blk.15.attn_v.weight | 0xf268b400 | 0x800000 | +| 143 | blk.15.ffn_down.weight | 0xf2e8b400 | 0x3520000 | +| 144 | blk.15.ffn_gate.weight | 0xf63ab400 | 0x3520000 | +| 145 | blk.15.ffn_norm.weight | 0xf98cb400 | 0x4000 | +| 146 | blk.15.ffn_up.weight | 0xf98cf400 | 0x3520000 | +| 147 | blk.16.attn_k.weight | 0xfcdef400 | 0x440000 | +| 148 | blk.16.attn_norm.weight | 0xfd22f400 | 0x4000 | +| 149 | blk.16.attn_output.weight | 0xfd233400 | 0x1100000 | +| 150 | blk.16.attn_q.weight | 0xfe333400 | 0x1100000 | +| 151 | blk.16.attn_v.weight | 0xff433400 | 0x800000 | +| 152 | blk.16.ffn_down.weight | 0xffc33400 | 0x3520000 | +| 153 | blk.16.ffn_gate.weight | 0x103153400 | 0x3520000 | +| 154 | blk.16.ffn_norm.weight | 0x106673400 | 0x4000 | +| 155 | blk.16.ffn_up.weight | 0x106677400 | 0x3520000 | +| 156 | blk.17.attn_k.weight | 0x109b97400 | 0x440000 | +| 157 | blk.17.attn_norm.weight | 0x109fd7400 | 0x4000 | +| 158 | blk.17.attn_output.weight | 0x109fdb400 | 0x1100000 | +| 159 | blk.17.attn_q.weight | 0x10b0db400 | 0x1100000 | +| 160 | blk.17.attn_v.weight | 0x10c1db400 | 0x440000 | +| 161 | blk.17.ffn_down.weight | 0x10c61b400 | 0x3520000 | +| 162 | blk.17.ffn_gate.weight | 0x10fb3b400 | 0x3520000 | +| 163 | blk.17.ffn_norm.weight | 0x11305b400 | 0x4000 | +| 164 | blk.17.ffn_up.weight | 0x11305f400 | 0x3520000 | +| 165 | blk.18.attn_k.weight | 0x11657f400 | 0x440000 | +| 166 | blk.18.attn_norm.weight | 0x1169bf400 | 0x4000 | +| 167 | blk.18.attn_output.weight | 0x1169c3400 | 0x1100000 | +| 168 | blk.18.attn_q.weight | 0x117ac3400 | 0x1100000 | +| 169 | blk.18.attn_v.weight | 0x118bc3400 | 0x440000 | +| 170 | blk.18.ffn_down.weight | 0x119003400 | 0x3520000 | +| 171 | blk.18.ffn_gate.weight | 0x11c523400 | 0x3520000 | +| 172 | blk.18.ffn_norm.weight | 0x11fa43400 | 0x4000 | +| 173 | blk.18.ffn_up.weight | 0x11fa47400 | 0x3520000 | +| 174 | blk.19.attn_k.weight | 0x122f67400 | 0x440000 | +| 175 | blk.19.attn_norm.weight | 0x1233a7400 | 0x4000 | +| 176 | blk.19.attn_output.weight | 0x1233ab400 | 0x1100000 | +| 177 | blk.19.attn_q.weight | 0x1244ab400 | 0x1100000 | +| 178 | blk.19.attn_v.weight | 0x1255ab400 | 0x440000 | +| 179 | blk.19.ffn_down.weight | 0x1259eb400 | 0x3520000 | +| 180 | blk.19.ffn_gate.weight | 0x128f0b400 | 0x3520000 | +| 181 | blk.19.ffn_norm.weight | 0x12c42b400 | 0x4000 | +| 182 | blk.19.ffn_up.weight | 0x12c42f400 | 0x3520000 | +| 183 | blk.20.attn_k.weight | 0x12f94f400 | 0x440000 | +| 184 | blk.20.attn_norm.weight | 0x12fd8f400 | 0x4000 | +| 185 | blk.20.attn_output.weight | 0x12fd93400 | 0x1100000 | +| 186 | blk.20.attn_q.weight | 0x130e93400 | 0x1100000 | +| 187 | blk.20.attn_v.weight | 0x131f93400 | 0x800000 | +| 188 | blk.20.ffn_down.weight | 0x132793400 | 0x3520000 | +| 189 | blk.20.ffn_gate.weight | 0x135cb3400 | 0x3520000 | +| 190 | blk.20.ffn_norm.weight | 0x1391d3400 | 0x4000 | +| 191 | blk.20.ffn_up.weight | 0x1391d7400 | 0x3520000 | +| 192 | blk.21.attn_k.weight | 0x13c6f7400 | 0x440000 | +| 193 | blk.21.attn_norm.weight | 0x13cb37400 | 0x4000 | +| 194 | blk.21.attn_output.weight | 0x13cb3b400 | 0x1100000 | +| 195 | blk.21.attn_q.weight | 0x13dc3b400 | 0x1100000 | +| 196 | blk.21.attn_v.weight | 0x13ed3b400 | 0x440000 | +| 197 | blk.21.ffn_down.weight | 0x13f17b400 | 0x3520000 | +| 198 | blk.21.ffn_gate.weight | 0x14269b400 | 0x3520000 | +| 199 | blk.21.ffn_norm.weight | 0x145bbb400 | 0x4000 | +| 200 | blk.21.ffn_up.weight | 0x145bbf400 | 0x3520000 | +| 201 | blk.22.attn_k.weight | 0x1490df400 | 0x440000 | +| 202 | blk.22.attn_norm.weight | 0x14951f400 | 0x4000 | +| 203 | blk.22.attn_output.weight | 0x149523400 | 0x1100000 | +| 204 | blk.22.attn_q.weight | 0x14a623400 | 0x1100000 | +| 205 | blk.22.attn_v.weight | 0x14b723400 | 0x440000 | +| 206 | blk.22.ffn_down.weight | 0x14bb63400 | 0x3520000 | +| 207 | blk.22.ffn_gate.weight | 0x14f083400 | 0x3520000 | +| 208 | blk.22.ffn_norm.weight | 0x1525a3400 | 0x4000 | +| 209 | blk.22.ffn_up.weight | 0x1525a7400 | 0x3520000 | +| 210 | blk.23.attn_k.weight | 0x155ac7400 | 0x440000 | +| 211 | blk.23.attn_norm.weight | 0x155f07400 | 0x4000 | +| 212 | blk.23.attn_output.weight | 0x155f0b400 | 0x1100000 | +| 213 | blk.23.attn_q.weight | 0x15700b400 | 0x1100000 | +| 214 | blk.23.attn_v.weight | 0x15810b400 | 0x440000 | +| 215 | blk.23.ffn_down.weight | 0x15854b400 | 0x3520000 | +| 216 | blk.23.ffn_gate.weight | 0x15ba6b400 | 0x3520000 | +| 217 | blk.23.ffn_norm.weight | 0x15ef8b400 | 0x4000 | +| 218 | blk.23.ffn_up.weight | 0x15ef8f400 | 0x3520000 | +| 219 | blk.24.attn_k.weight | 0x1624af400 | 0x440000 | +| 220 | blk.24.attn_norm.weight | 0x1628ef400 | 0x4000 | +| 221 | blk.24.attn_output.weight | 0x1628f3400 | 0x1100000 | +| 222 | blk.24.attn_q.weight | 0x1639f3400 | 0x1100000 | +| 223 | blk.24.attn_v.weight | 0x164af3400 | 0x440000 | +| 224 | blk.24.ffn_down.weight | 0x164f33400 | 0x3520000 | +| 225 | blk.24.ffn_gate.weight | 0x168453400 | 0x3520000 | +| 226 | blk.24.ffn_norm.weight | 0x16b973400 | 0x4000 | +| 227 | blk.24.ffn_up.weight | 0x16b977400 | 0x3520000 | +| 228 | blk.25.attn_k.weight | 0x16ee97400 | 0x440000 | +| 229 | blk.25.attn_norm.weight | 0x16f2d7400 | 0x4000 | +| 230 | blk.25.attn_output.weight | 0x16f2db400 | 0x1100000 | +| 231 | blk.25.attn_q.weight | 0x1703db400 | 0x1100000 | +| 232 | blk.25.attn_v.weight | 0x1714db400 | 0x800000 | +| 233 | blk.25.ffn_down.weight | 0x171cdb400 | 0x3520000 | +| 234 | blk.25.ffn_gate.weight | 0x1751fb400 | 0x3520000 | +| 235 | blk.25.ffn_norm.weight | 0x17871b400 | 0x4000 | +| 236 | blk.25.ffn_up.weight | 0x17871f400 | 0x3520000 | +| 237 | blk.26.attn_k.weight | 0x17bc3f400 | 0x440000 | +| 238 | blk.26.attn_norm.weight | 0x17c07f400 | 0x4000 | +| 239 | blk.26.attn_output.weight | 0x17c083400 | 0x1100000 | +| 240 | blk.26.attn_q.weight | 0x17d183400 | 0x1100000 | +| 241 | blk.26.attn_v.weight | 0x17e283400 | 0x440000 | +| 242 | blk.26.ffn_down.weight | 0x17e6c3400 | 0x3520000 | +| 243 | blk.26.ffn_gate.weight | 0x181be3400 | 0x3520000 | +| 244 | blk.26.ffn_norm.weight | 0x185103400 | 0x4000 | +| 245 | blk.26.ffn_up.weight | 0x185107400 | 0x3520000 | +| 246 | blk.27.attn_k.weight | 0x188627400 | 0x440000 | +| 247 | blk.27.attn_norm.weight | 0x188a67400 | 0x4000 | +| 248 | blk.27.attn_output.weight | 0x188a6b400 | 0x1100000 | +| 249 | blk.27.attn_q.weight | 0x189b6b400 | 0x1100000 | +| 250 | blk.27.attn_v.weight | 0x18ac6b400 | 0x440000 | +| 251 | blk.27.ffn_down.weight | 0x18b0ab400 | 0x3520000 | +| 252 | blk.27.ffn_gate.weight | 0x18e5cb400 | 0x3520000 | +| 253 | blk.27.ffn_norm.weight | 0x191aeb400 | 0x4000 | +| 254 | blk.27.ffn_up.weight | 0x191aef400 | 0x3520000 | +| 255 | blk.28.attn_k.weight | 0x19500f400 | 0x440000 | +| 256 | blk.28.attn_norm.weight | 0x19544f400 | 0x4000 | +| 257 | blk.28.attn_output.weight | 0x195453400 | 0x1100000 | +| 258 | blk.28.attn_q.weight | 0x196553400 | 0x1100000 | +| 259 | blk.28.attn_v.weight | 0x197653400 | 0x800000 | +| 260 | blk.28.ffn_down.weight | 0x197e53400 | 0x3520000 | +| 261 | blk.28.ffn_gate.weight | 0x19b373400 | 0x3520000 | +| 262 | blk.28.ffn_norm.weight | 0x19e893400 | 0x4000 | +| 263 | blk.28.ffn_up.weight | 0x19e897400 | 0x3520000 | +| 264 | blk.29.attn_k.weight | 0x1a1db7400 | 0x440000 | +| 265 | blk.29.attn_norm.weight | 0x1a21f7400 | 0x4000 | +| 266 | blk.29.attn_output.weight | 0x1a21fb400 | 0x1100000 | +| 267 | blk.29.attn_q.weight | 0x1a32fb400 | 0x1100000 | +| 268 | blk.29.attn_v.weight | 0x1a43fb400 | 0x440000 | +| 269 | blk.29.ffn_down.weight | 0x1a483b400 | 0x3520000 | +| 270 | blk.29.ffn_gate.weight | 0x1a7d5b400 | 0x3520000 | +| 271 | blk.29.ffn_norm.weight | 0x1ab27b400 | 0x4000 | +| 272 | blk.29.ffn_up.weight | 0x1ab27f400 | 0x3520000 | +| 273 | blk.30.attn_k.weight | 0x1ae79f400 | 0x440000 | +| 274 | blk.30.attn_norm.weight | 0x1aebdf400 | 0x4000 | +| 275 | blk.30.attn_output.weight | 0x1aebe3400 | 0x1100000 | +| 276 | blk.30.attn_q.weight | 0x1afce3400 | 0x1100000 | +| 277 | blk.30.attn_v.weight | 0x1b0de3400 | 0x800000 | +| 278 | blk.30.ffn_down.weight | 0x1b15e3400 | 0x3520000 | +| 279 | blk.30.ffn_gate.weight | 0x1b4b03400 | 0x3520000 | +| 280 | blk.30.ffn_norm.weight | 0x1b8023400 | 0x4000 | +| 281 | blk.30.ffn_up.weight | 0x1b8027400 | 0x3520000 | +| 282 | blk.31.attn_k.weight | 0x1bb547400 | 0x440000 | +| 283 | blk.31.attn_norm.weight | 0x1bb987400 | 0x4000 | +| 284 | blk.31.attn_output.weight | 0x1bb98b400 | 0x1100000 | +| 285 | blk.31.attn_q.weight | 0x1bca8b400 | 0x1100000 | +| 286 | blk.31.attn_v.weight | 0x1bdb8b400 | 0x440000 | +| 287 | blk.31.ffn_down.weight | 0x1bdfcb400 | 0x3520000 | +| 288 | blk.31.ffn_gate.weight | 0x1c14eb400 | 0x3520000 | +| 289 | blk.31.ffn_norm.weight | 0x1c4a0b400 | 0x4000 | +| 290 | blk.31.ffn_up.weight | 0x1c4a0f400 | 0x3520000 | +| 291 | blk.32.attn_k.weight | 0x1c7f2f400 | 0x440000 | +| 292 | blk.32.attn_norm.weight | 0x1c836f400 | 0x4000 | +| 293 | blk.32.attn_output.weight | 0x1c8373400 | 0x1100000 | +| 294 | blk.32.attn_q.weight | 0x1c9473400 | 0x1100000 | +| 295 | blk.32.attn_v.weight | 0x1ca573400 | 0x440000 | +| 296 | blk.32.ffn_down.weight | 0x1ca9b3400 | 0x3520000 | +| 297 | blk.32.ffn_gate.weight | 0x1cded3400 | 0x3520000 | +| 298 | blk.32.ffn_norm.weight | 0x1d13f3400 | 0x4000 | +| 299 | blk.32.ffn_up.weight | 0x1d13f7400 | 0x3520000 | +| 300 | blk.33.attn_k.weight | 0x1d4917400 | 0x440000 | +| 301 | blk.33.attn_norm.weight | 0x1d4d57400 | 0x4000 | +| 302 | blk.33.attn_output.weight | 0x1d4d5b400 | 0x1100000 | +| 303 | blk.33.attn_q.weight | 0x1d5e5b400 | 0x1100000 | +| 304 | blk.33.attn_v.weight | 0x1d6f5b400 | 0x440000 | +| 305 | blk.33.ffn_down.weight | 0x1d739b400 | 0x3520000 | +| 306 | blk.33.ffn_gate.weight | 0x1da8bb400 | 0x3520000 | +| 307 | blk.33.ffn_norm.weight | 0x1ddddb400 | 0x4000 | +| 308 | blk.33.ffn_up.weight | 0x1ddddf400 | 0x3520000 | +| 309 | blk.34.attn_k.weight | 0x1e12ff400 | 0x440000 | +| 310 | blk.34.attn_norm.weight | 0x1e173f400 | 0x4000 | +| 311 | blk.34.attn_output.weight | 0x1e1743400 | 0x1100000 | +| 312 | blk.34.attn_q.weight | 0x1e2843400 | 0x1100000 | +| 313 | blk.34.attn_v.weight | 0x1e3943400 | 0x440000 | +| 314 | blk.34.ffn_down.weight | 0x1e3d83400 | 0x3520000 | +| 315 | blk.34.ffn_gate.weight | 0x1e72a3400 | 0x3520000 | +| 316 | blk.34.ffn_norm.weight | 0x1ea7c3400 | 0x4000 | +| 317 | blk.34.ffn_up.weight | 0x1ea7c7400 | 0x3520000 | +| 318 | blk.35.attn_k.weight | 0x1edce7400 | 0x440000 | +| 319 | blk.35.attn_norm.weight | 0x1ee127400 | 0x4000 | +| 320 | blk.35.attn_output.weight | 0x1ee12b400 | 0x1100000 | +| 321 | blk.35.attn_q.weight | 0x1ef22b400 | 0x1100000 | +| 322 | blk.35.attn_v.weight | 0x1f032b400 | 0x440000 | +| 323 | blk.35.ffn_down.weight | 0x1f076b400 | 0x3520000 | +| 324 | blk.35.ffn_gate.weight | 0x1f3c8b400 | 0x3520000 | +| 325 | blk.35.ffn_norm.weight | 0x1f71ab400 | 0x4000 | +| 326 | blk.35.ffn_up.weight | 0x1f71af400 | 0x3520000 | +| 327 | blk.36.attn_k.weight | 0x1fa6cf400 | 0x440000 | +| 328 | blk.36.attn_norm.weight | 0x1fab0f400 | 0x4000 | +| 329 | blk.36.attn_output.weight | 0x1fab13400 | 0x1100000 | +| 330 | blk.36.attn_q.weight | 0x1fbc13400 | 0x1100000 | +| 331 | blk.36.attn_v.weight | 0x1fcd13400 | 0x440000 | +| 332 | blk.36.ffn_down.weight | 0x1fd153400 | 0x3520000 | +| 333 | blk.36.ffn_gate.weight | 0x200673400 | 0x3520000 | +| 334 | blk.36.ffn_norm.weight | 0x203b93400 | 0x4000 | +| 335 | blk.36.ffn_up.weight | 0x203b97400 | 0x3520000 | +| 336 | blk.37.attn_k.weight | 0x2070b7400 | 0x440000 | +| 337 | blk.37.attn_norm.weight | 0x2074f7400 | 0x4000 | +| 338 | blk.37.attn_output.weight | 0x2074fb400 | 0x1100000 | +| 339 | blk.37.attn_q.weight | 0x2085fb400 | 0x1100000 | +| 340 | blk.37.attn_v.weight | 0x2096fb400 | 0x440000 | +| 341 | blk.37.ffn_down.weight | 0x209b3b400 | 0x3520000 | +| 342 | blk.37.ffn_gate.weight | 0x20d05b400 | 0x3520000 | +| 343 | blk.37.ffn_norm.weight | 0x21057b400 | 0x4000 | +| 344 | blk.37.ffn_up.weight | 0x21057f400 | 0x3520000 | +| 345 | blk.38.attn_k.weight | 0x213a9f400 | 0x440000 | +| 346 | blk.38.attn_norm.weight | 0x213edf400 | 0x4000 | +| 347 | blk.38.attn_output.weight | 0x213ee3400 | 0x1100000 | +| 348 | blk.38.attn_q.weight | 0x214fe3400 | 0x1100000 | +| 349 | blk.38.attn_v.weight | 0x2160e3400 | 0x440000 | +| 350 | blk.38.ffn_down.weight | 0x216523400 | 0x3520000 | +| 351 | blk.38.ffn_gate.weight | 0x219a43400 | 0x3520000 | +| 352 | blk.38.ffn_norm.weight | 0x21cf63400 | 0x4000 | +| 353 | blk.38.ffn_up.weight | 0x21cf67400 | 0x3520000 | +| 354 | blk.39.attn_k.weight | 0x220487400 | 0x440000 | +| 355 | blk.39.attn_norm.weight | 0x2208c7400 | 0x4000 | +| 356 | blk.39.attn_output.weight | 0x2208cb400 | 0x1100000 | +| 357 | blk.39.attn_q.weight | 0x2219cb400 | 0x1100000 | +| 358 | blk.39.attn_v.weight | 0x222acb400 | 0x440000 | +| 359 | blk.39.ffn_down.weight | 0x222f0b400 | 0x3520000 | +| 360 | blk.39.ffn_gate.weight | 0x22642b400 | 0x3520000 | +| 361 | blk.39.ffn_norm.weight | 0x22994b400 | 0x4000 | +| 362 | blk.39.ffn_up.weight | 0x22994f400 | 0x3520000 | + +### Base Tensor Group : ~822M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------|:---------------------------------|:------------------|:----------------------|:-----|--------:| +| 0 | output.weight | Output (W) | (~411M) 411041792 | 4096 x 100352 x 1 x 1 | Q8_0 | 8.5000 | +| 1 | output_norm.weight | Output Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 2 | token_embd.weight | Token Embedding (W) | (~411M) 411041792 | 4096 x 100352 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in base: (~822M) 822087680 +- Percentage of total elements: 9.35% +- Bits per Weight (BPW) for base: 8.5001 bits + + +### Block 0 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 3 | blk.0.attn_k.weight | Block 0 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 4 | blk.0.attn_norm.weight | Block 0 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 5 | blk.0.attn_output.weight | Block 0 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 6 | blk.0.attn_q.weight | Block 0 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q6_K | 6.5625 | +| 7 | blk.0.attn_v.weight | Block 0 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 8 | blk.0.ffn_down.weight | Block 0 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 9 | blk.0.ffn_gate.weight | Block 0 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q6_K | 6.5625 | +| 10 | blk.0.ffn_norm.weight | Block 0 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 11 | blk.0.ffn_up.weight | Block 0 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.0: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.0: 7.8280 bits + + +### Block 1 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 12 | blk.1.attn_k.weight | Block 1 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 13 | blk.1.attn_norm.weight | Block 1 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 14 | blk.1.attn_output.weight | Block 1 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 15 | blk.1.attn_q.weight | Block 1 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 16 | blk.1.attn_v.weight | Block 1 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 17 | blk.1.ffn_down.weight | Block 1 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 18 | blk.1.ffn_gate.weight | Block 1 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 19 | blk.1.ffn_norm.weight | Block 1 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 20 | blk.1.ffn_up.weight | Block 1 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.1: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.1: 8.5010 bits + + +### Block 2 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 21 | blk.2.attn_k.weight | Block 2 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 22 | blk.2.attn_norm.weight | Block 2 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 23 | blk.2.attn_output.weight | Block 2 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 24 | blk.2.attn_q.weight | Block 2 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 25 | blk.2.attn_v.weight | Block 2 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 26 | blk.2.ffn_down.weight | Block 2 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 27 | blk.2.ffn_gate.weight | Block 2 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 28 | blk.2.ffn_norm.weight | Block 2 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 29 | blk.2.ffn_up.weight | Block 2 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.2: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.2: 8.5010 bits + + +### Block 3 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 30 | blk.3.attn_k.weight | Block 3 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 31 | blk.3.attn_norm.weight | Block 3 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 32 | blk.3.attn_output.weight | Block 3 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 33 | blk.3.attn_q.weight | Block 3 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 34 | blk.3.attn_v.weight | Block 3 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | BF16 | 16.0000 | +| 35 | blk.3.ffn_down.weight | Block 3 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_1 | 6.0000 | +| 36 | blk.3.ffn_gate.weight | Block 3 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 37 | blk.3.ffn_norm.weight | Block 3 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 38 | blk.3.ffn_up.weight | Block 3 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.3: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.3: 8.0010 bits + + +### Block 4 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 39 | blk.4.attn_k.weight | Block 4 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 40 | blk.4.attn_norm.weight | Block 4 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 41 | blk.4.attn_output.weight | Block 4 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 42 | blk.4.attn_q.weight | Block 4 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 43 | blk.4.attn_v.weight | Block 4 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | BF16 | 16.0000 | +| 44 | blk.4.ffn_down.weight | Block 4 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 45 | blk.4.ffn_gate.weight | Block 4 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 46 | blk.4.ffn_norm.weight | Block 4 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 47 | blk.4.ffn_up.weight | Block 4 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.4: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.4: 8.6589 bits + + +### Block 5 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 48 | blk.5.attn_k.weight | Block 5 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 49 | blk.5.attn_norm.weight | Block 5 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 50 | blk.5.attn_output.weight | Block 5 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 51 | blk.5.attn_q.weight | Block 5 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 52 | blk.5.attn_v.weight | Block 5 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | BF16 | 16.0000 | +| 53 | blk.5.ffn_down.weight | Block 5 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q5_1 | 6.0000 | +| 54 | blk.5.ffn_gate.weight | Block 5 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 55 | blk.5.ffn_norm.weight | Block 5 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 56 | blk.5.ffn_up.weight | Block 5 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.5: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.5: 8.0010 bits + + +### Block 6 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 57 | blk.6.attn_k.weight | Block 6 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 58 | blk.6.attn_norm.weight | Block 6 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 59 | blk.6.attn_output.weight | Block 6 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 60 | blk.6.attn_q.weight | Block 6 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 61 | blk.6.attn_v.weight | Block 6 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | BF16 | 16.0000 | +| 62 | blk.6.ffn_down.weight | Block 6 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 63 | blk.6.ffn_gate.weight | Block 6 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 64 | blk.6.ffn_norm.weight | Block 6 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 65 | blk.6.ffn_up.weight | Block 6 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.6: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.6: 8.6589 bits + + +### Block 7 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 66 | blk.7.attn_k.weight | Block 7 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 67 | blk.7.attn_norm.weight | Block 7 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 68 | blk.7.attn_output.weight | Block 7 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 69 | blk.7.attn_q.weight | Block 7 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 70 | blk.7.attn_v.weight | Block 7 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | BF16 | 16.0000 | +| 71 | blk.7.ffn_down.weight | Block 7 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 72 | blk.7.ffn_gate.weight | Block 7 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 73 | blk.7.ffn_norm.weight | Block 7 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 74 | blk.7.ffn_up.weight | Block 7 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.7: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.7: 8.6589 bits + + +### Block 8 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 75 | blk.8.attn_k.weight | Block 8 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 76 | blk.8.attn_norm.weight | Block 8 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 77 | blk.8.attn_output.weight | Block 8 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 78 | blk.8.attn_q.weight | Block 8 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 79 | blk.8.attn_v.weight | Block 8 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 80 | blk.8.ffn_down.weight | Block 8 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 81 | blk.8.ffn_gate.weight | Block 8 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 82 | blk.8.ffn_norm.weight | Block 8 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 83 | blk.8.ffn_up.weight | Block 8 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.8: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.8: 8.5010 bits + + +### Block 9 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:-------------------------|:-----------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 84 | blk.9.attn_k.weight | Block 9 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 85 | blk.9.attn_norm.weight | Block 9 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 86 | blk.9.attn_output.weight | Block 9 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 87 | blk.9.attn_q.weight | Block 9 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 88 | blk.9.attn_v.weight | Block 9 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | BF16 | 16.0000 | +| 89 | blk.9.ffn_down.weight | Block 9 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 90 | blk.9.ffn_gate.weight | Block 9 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 91 | blk.9.ffn_norm.weight | Block 9 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 92 | blk.9.ffn_up.weight | Block 9 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.9: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.9: 8.6589 bits + + +### Block 10 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 93 | blk.10.attn_k.weight | Block 10 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 94 | blk.10.attn_norm.weight | Block 10 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 95 | blk.10.attn_output.weight | Block 10 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 96 | blk.10.attn_q.weight | Block 10 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 97 | blk.10.attn_v.weight | Block 10 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 98 | blk.10.ffn_down.weight | Block 10 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 99 | blk.10.ffn_gate.weight | Block 10 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 100 | blk.10.ffn_norm.weight | Block 10 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 101 | blk.10.ffn_up.weight | Block 10 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.10: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.10: 8.5010 bits + + +### Block 11 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 102 | blk.11.attn_k.weight | Block 11 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 103 | blk.11.attn_norm.weight | Block 11 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 104 | blk.11.attn_output.weight | Block 11 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 105 | blk.11.attn_q.weight | Block 11 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 106 | blk.11.attn_v.weight | Block 11 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 107 | blk.11.ffn_down.weight | Block 11 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 108 | blk.11.ffn_gate.weight | Block 11 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 109 | blk.11.ffn_norm.weight | Block 11 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 110 | blk.11.ffn_up.weight | Block 11 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.11: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.11: 8.5010 bits + + +### Block 12 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 111 | blk.12.attn_k.weight | Block 12 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 112 | blk.12.attn_norm.weight | Block 12 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 113 | blk.12.attn_output.weight | Block 12 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 114 | blk.12.attn_q.weight | Block 12 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 115 | blk.12.attn_v.weight | Block 12 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 116 | blk.12.ffn_down.weight | Block 12 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 117 | blk.12.ffn_gate.weight | Block 12 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 118 | blk.12.ffn_norm.weight | Block 12 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 119 | blk.12.ffn_up.weight | Block 12 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.12: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.12: 8.5010 bits + + +### Block 13 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 120 | blk.13.attn_k.weight | Block 13 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 121 | blk.13.attn_norm.weight | Block 13 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 122 | blk.13.attn_output.weight | Block 13 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 123 | blk.13.attn_q.weight | Block 13 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 124 | blk.13.attn_v.weight | Block 13 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 125 | blk.13.ffn_down.weight | Block 13 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 126 | blk.13.ffn_gate.weight | Block 13 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 127 | blk.13.ffn_norm.weight | Block 13 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 128 | blk.13.ffn_up.weight | Block 13 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.13: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.13: 8.5010 bits + + +### Block 14 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 129 | blk.14.attn_k.weight | Block 14 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 130 | blk.14.attn_norm.weight | Block 14 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 131 | blk.14.attn_output.weight | Block 14 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 132 | blk.14.attn_q.weight | Block 14 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 133 | blk.14.attn_v.weight | Block 14 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 134 | blk.14.ffn_down.weight | Block 14 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 135 | blk.14.ffn_gate.weight | Block 14 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 136 | blk.14.ffn_norm.weight | Block 14 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 137 | blk.14.ffn_up.weight | Block 14 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.14: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.14: 8.5010 bits + + +### Block 15 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 138 | blk.15.attn_k.weight | Block 15 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 139 | blk.15.attn_norm.weight | Block 15 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 140 | blk.15.attn_output.weight | Block 15 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 141 | blk.15.attn_q.weight | Block 15 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 142 | blk.15.attn_v.weight | Block 15 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | BF16 | 16.0000 | +| 143 | blk.15.ffn_down.weight | Block 15 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 144 | blk.15.ffn_gate.weight | Block 15 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 145 | blk.15.ffn_norm.weight | Block 15 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 146 | blk.15.ffn_up.weight | Block 15 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.15: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.15: 8.6589 bits + + +### Block 16 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 147 | blk.16.attn_k.weight | Block 16 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 148 | blk.16.attn_norm.weight | Block 16 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 149 | blk.16.attn_output.weight | Block 16 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 150 | blk.16.attn_q.weight | Block 16 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 151 | blk.16.attn_v.weight | Block 16 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | BF16 | 16.0000 | +| 152 | blk.16.ffn_down.weight | Block 16 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 153 | blk.16.ffn_gate.weight | Block 16 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 154 | blk.16.ffn_norm.weight | Block 16 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 155 | blk.16.ffn_up.weight | Block 16 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.16: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.16: 8.6589 bits + + +### Block 17 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 156 | blk.17.attn_k.weight | Block 17 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 157 | blk.17.attn_norm.weight | Block 17 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 158 | blk.17.attn_output.weight | Block 17 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 159 | blk.17.attn_q.weight | Block 17 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 160 | blk.17.attn_v.weight | Block 17 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 161 | blk.17.ffn_down.weight | Block 17 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 162 | blk.17.ffn_gate.weight | Block 17 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 163 | blk.17.ffn_norm.weight | Block 17 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 164 | blk.17.ffn_up.weight | Block 17 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.17: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.17: 8.5010 bits + + +### Block 18 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 165 | blk.18.attn_k.weight | Block 18 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 166 | blk.18.attn_norm.weight | Block 18 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 167 | blk.18.attn_output.weight | Block 18 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 168 | blk.18.attn_q.weight | Block 18 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 169 | blk.18.attn_v.weight | Block 18 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 170 | blk.18.ffn_down.weight | Block 18 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 171 | blk.18.ffn_gate.weight | Block 18 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 172 | blk.18.ffn_norm.weight | Block 18 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 173 | blk.18.ffn_up.weight | Block 18 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.18: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.18: 8.5010 bits + + +### Block 19 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 174 | blk.19.attn_k.weight | Block 19 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 175 | blk.19.attn_norm.weight | Block 19 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 176 | blk.19.attn_output.weight | Block 19 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 177 | blk.19.attn_q.weight | Block 19 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 178 | blk.19.attn_v.weight | Block 19 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 179 | blk.19.ffn_down.weight | Block 19 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 180 | blk.19.ffn_gate.weight | Block 19 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 181 | blk.19.ffn_norm.weight | Block 19 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 182 | blk.19.ffn_up.weight | Block 19 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.19: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.19: 8.5010 bits + + +### Block 20 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 183 | blk.20.attn_k.weight | Block 20 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 184 | blk.20.attn_norm.weight | Block 20 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 185 | blk.20.attn_output.weight | Block 20 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 186 | blk.20.attn_q.weight | Block 20 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 187 | blk.20.attn_v.weight | Block 20 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | BF16 | 16.0000 | +| 188 | blk.20.ffn_down.weight | Block 20 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 189 | blk.20.ffn_gate.weight | Block 20 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 190 | blk.20.ffn_norm.weight | Block 20 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 191 | blk.20.ffn_up.weight | Block 20 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.20: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.20: 8.6589 bits + + +### Block 21 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 192 | blk.21.attn_k.weight | Block 21 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 193 | blk.21.attn_norm.weight | Block 21 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 194 | blk.21.attn_output.weight | Block 21 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 195 | blk.21.attn_q.weight | Block 21 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 196 | blk.21.attn_v.weight | Block 21 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 197 | blk.21.ffn_down.weight | Block 21 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 198 | blk.21.ffn_gate.weight | Block 21 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 199 | blk.21.ffn_norm.weight | Block 21 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 200 | blk.21.ffn_up.weight | Block 21 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.21: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.21: 8.5010 bits + + +### Block 22 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 201 | blk.22.attn_k.weight | Block 22 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 202 | blk.22.attn_norm.weight | Block 22 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 203 | blk.22.attn_output.weight | Block 22 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 204 | blk.22.attn_q.weight | Block 22 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 205 | blk.22.attn_v.weight | Block 22 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 206 | blk.22.ffn_down.weight | Block 22 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 207 | blk.22.ffn_gate.weight | Block 22 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 208 | blk.22.ffn_norm.weight | Block 22 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 209 | blk.22.ffn_up.weight | Block 22 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.22: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.22: 8.5010 bits + + +### Block 23 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 210 | blk.23.attn_k.weight | Block 23 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 211 | blk.23.attn_norm.weight | Block 23 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 212 | blk.23.attn_output.weight | Block 23 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 213 | blk.23.attn_q.weight | Block 23 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 214 | blk.23.attn_v.weight | Block 23 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 215 | blk.23.ffn_down.weight | Block 23 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 216 | blk.23.ffn_gate.weight | Block 23 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 217 | blk.23.ffn_norm.weight | Block 23 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 218 | blk.23.ffn_up.weight | Block 23 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.23: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.23: 8.5010 bits + + +### Block 24 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 219 | blk.24.attn_k.weight | Block 24 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 220 | blk.24.attn_norm.weight | Block 24 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 221 | blk.24.attn_output.weight | Block 24 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 222 | blk.24.attn_q.weight | Block 24 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 223 | blk.24.attn_v.weight | Block 24 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 224 | blk.24.ffn_down.weight | Block 24 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 225 | blk.24.ffn_gate.weight | Block 24 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 226 | blk.24.ffn_norm.weight | Block 24 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 227 | blk.24.ffn_up.weight | Block 24 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.24: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.24: 8.5010 bits + + +### Block 25 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 228 | blk.25.attn_k.weight | Block 25 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 229 | blk.25.attn_norm.weight | Block 25 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 230 | blk.25.attn_output.weight | Block 25 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 231 | blk.25.attn_q.weight | Block 25 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 232 | blk.25.attn_v.weight | Block 25 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | BF16 | 16.0000 | +| 233 | blk.25.ffn_down.weight | Block 25 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 234 | blk.25.ffn_gate.weight | Block 25 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 235 | blk.25.ffn_norm.weight | Block 25 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 236 | blk.25.ffn_up.weight | Block 25 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.25: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.25: 8.6589 bits + + +### Block 26 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 237 | blk.26.attn_k.weight | Block 26 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 238 | blk.26.attn_norm.weight | Block 26 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 239 | blk.26.attn_output.weight | Block 26 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 240 | blk.26.attn_q.weight | Block 26 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 241 | blk.26.attn_v.weight | Block 26 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 242 | blk.26.ffn_down.weight | Block 26 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 243 | blk.26.ffn_gate.weight | Block 26 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 244 | blk.26.ffn_norm.weight | Block 26 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 245 | blk.26.ffn_up.weight | Block 26 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.26: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.26: 8.5010 bits + + +### Block 27 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 246 | blk.27.attn_k.weight | Block 27 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 247 | blk.27.attn_norm.weight | Block 27 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 248 | blk.27.attn_output.weight | Block 27 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 249 | blk.27.attn_q.weight | Block 27 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 250 | blk.27.attn_v.weight | Block 27 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 251 | blk.27.ffn_down.weight | Block 27 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 252 | blk.27.ffn_gate.weight | Block 27 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 253 | blk.27.ffn_norm.weight | Block 27 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 254 | blk.27.ffn_up.weight | Block 27 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.27: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.27: 8.5010 bits + + +### Block 28 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 255 | blk.28.attn_k.weight | Block 28 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 256 | blk.28.attn_norm.weight | Block 28 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 257 | blk.28.attn_output.weight | Block 28 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 258 | blk.28.attn_q.weight | Block 28 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 259 | blk.28.attn_v.weight | Block 28 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | BF16 | 16.0000 | +| 260 | blk.28.ffn_down.weight | Block 28 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 261 | blk.28.ffn_gate.weight | Block 28 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 262 | blk.28.ffn_norm.weight | Block 28 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 263 | blk.28.ffn_up.weight | Block 28 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.28: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.28: 8.6589 bits + + +### Block 29 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 264 | blk.29.attn_k.weight | Block 29 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 265 | blk.29.attn_norm.weight | Block 29 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 266 | blk.29.attn_output.weight | Block 29 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 267 | blk.29.attn_q.weight | Block 29 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 268 | blk.29.attn_v.weight | Block 29 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 269 | blk.29.ffn_down.weight | Block 29 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 270 | blk.29.ffn_gate.weight | Block 29 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 271 | blk.29.ffn_norm.weight | Block 29 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 272 | blk.29.ffn_up.weight | Block 29 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.29: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.29: 8.5010 bits + + +### Block 30 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 273 | blk.30.attn_k.weight | Block 30 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 274 | blk.30.attn_norm.weight | Block 30 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 275 | blk.30.attn_output.weight | Block 30 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 276 | blk.30.attn_q.weight | Block 30 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 277 | blk.30.attn_v.weight | Block 30 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | BF16 | 16.0000 | +| 278 | blk.30.ffn_down.weight | Block 30 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 279 | blk.30.ffn_gate.weight | Block 30 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 280 | blk.30.ffn_norm.weight | Block 30 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 281 | blk.30.ffn_up.weight | Block 30 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.30: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.30: 8.6589 bits + + +### Block 31 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 282 | blk.31.attn_k.weight | Block 31 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 283 | blk.31.attn_norm.weight | Block 31 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 284 | blk.31.attn_output.weight | Block 31 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 285 | blk.31.attn_q.weight | Block 31 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 286 | blk.31.attn_v.weight | Block 31 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 287 | blk.31.ffn_down.weight | Block 31 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 288 | blk.31.ffn_gate.weight | Block 31 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 289 | blk.31.ffn_norm.weight | Block 31 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 290 | blk.31.ffn_up.weight | Block 31 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.31: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.31: 8.5010 bits + + +### Block 32 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 291 | blk.32.attn_k.weight | Block 32 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 292 | blk.32.attn_norm.weight | Block 32 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 293 | blk.32.attn_output.weight | Block 32 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 294 | blk.32.attn_q.weight | Block 32 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 295 | blk.32.attn_v.weight | Block 32 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 296 | blk.32.ffn_down.weight | Block 32 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 297 | blk.32.ffn_gate.weight | Block 32 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 298 | blk.32.ffn_norm.weight | Block 32 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 299 | blk.32.ffn_up.weight | Block 32 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.32: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.32: 8.5010 bits + + +### Block 33 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 300 | blk.33.attn_k.weight | Block 33 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 301 | blk.33.attn_norm.weight | Block 33 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 302 | blk.33.attn_output.weight | Block 33 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 303 | blk.33.attn_q.weight | Block 33 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 304 | blk.33.attn_v.weight | Block 33 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 305 | blk.33.ffn_down.weight | Block 33 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 306 | blk.33.ffn_gate.weight | Block 33 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 307 | blk.33.ffn_norm.weight | Block 33 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 308 | blk.33.ffn_up.weight | Block 33 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.33: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.33: 8.5010 bits + + +### Block 34 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 309 | blk.34.attn_k.weight | Block 34 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 310 | blk.34.attn_norm.weight | Block 34 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 311 | blk.34.attn_output.weight | Block 34 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 312 | blk.34.attn_q.weight | Block 34 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 313 | blk.34.attn_v.weight | Block 34 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 314 | blk.34.ffn_down.weight | Block 34 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 315 | blk.34.ffn_gate.weight | Block 34 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 316 | blk.34.ffn_norm.weight | Block 34 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 317 | blk.34.ffn_up.weight | Block 34 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.34: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.34: 8.5010 bits + + +### Block 35 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 318 | blk.35.attn_k.weight | Block 35 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 319 | blk.35.attn_norm.weight | Block 35 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 320 | blk.35.attn_output.weight | Block 35 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 321 | blk.35.attn_q.weight | Block 35 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 322 | blk.35.attn_v.weight | Block 35 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 323 | blk.35.ffn_down.weight | Block 35 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 324 | blk.35.ffn_gate.weight | Block 35 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 325 | blk.35.ffn_norm.weight | Block 35 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 326 | blk.35.ffn_up.weight | Block 35 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.35: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.35: 8.5010 bits + + +### Block 36 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 327 | blk.36.attn_k.weight | Block 36 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 328 | blk.36.attn_norm.weight | Block 36 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 329 | blk.36.attn_output.weight | Block 36 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 330 | blk.36.attn_q.weight | Block 36 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 331 | blk.36.attn_v.weight | Block 36 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 332 | blk.36.ffn_down.weight | Block 36 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 333 | blk.36.ffn_gate.weight | Block 36 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 334 | blk.36.ffn_norm.weight | Block 36 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 335 | blk.36.ffn_up.weight | Block 36 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.36: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.36: 8.5010 bits + + +### Block 37 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 336 | blk.37.attn_k.weight | Block 37 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 337 | blk.37.attn_norm.weight | Block 37 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 338 | blk.37.attn_output.weight | Block 37 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 339 | blk.37.attn_q.weight | Block 37 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 340 | blk.37.attn_v.weight | Block 37 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 341 | blk.37.ffn_down.weight | Block 37 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 342 | blk.37.ffn_gate.weight | Block 37 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 343 | blk.37.ffn_norm.weight | Block 37 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 344 | blk.37.ffn_up.weight | Block 37 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.37: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.37: 8.5010 bits + + +### Block 38 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 345 | blk.38.attn_k.weight | Block 38 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 346 | blk.38.attn_norm.weight | Block 38 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 347 | blk.38.attn_output.weight | Block 38 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 348 | blk.38.attn_q.weight | Block 38 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 349 | blk.38.attn_v.weight | Block 38 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 350 | blk.38.ffn_down.weight | Block 38 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 351 | blk.38.ffn_gate.weight | Block 38 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 352 | blk.38.ffn_norm.weight | Block 38 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 353 | blk.38.ffn_up.weight | Block 38 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.38: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.38: 8.5010 bits + + +### Block 39 Tensor Group : ~199M Elements + +| T_ID | Tensor Layer Name | Human Friendly Tensor Layer Name | Elements | Shape | Type | BPW | +|-----:|:--------------------------|:------------------------------------------------|:----------------|:----------------------|:-----|--------:| +| 354 | blk.39.attn_k.weight | Block 39 Attention Key (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 355 | blk.39.attn_norm.weight | Block 39 Attention Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 356 | blk.39.attn_output.weight | Block 39 Attention Output (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 357 | blk.39.attn_q.weight | Block 39 Attention Query (W) | (~17M) 16777216 | 4096 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 358 | blk.39.attn_v.weight | Block 39 Attention Value (W) | ( ~4M) 4194304 | 4096 x 1024 x 1 x 1 | Q8_0 | 8.5000 | +| 359 | blk.39.ffn_down.weight | Block 39 Feed-Forward Network "Down" (W) | (~52M) 52428800 | 12800 x 4096 x 1 x 1 | Q8_0 | 8.5000 | +| 360 | blk.39.ffn_gate.weight | Block 39 Feed-Forward Network "Gate" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | +| 361 | blk.39.ffn_norm.weight | Block 39 Feed-Forward Network Normalization (W) | ( ~4K) 4096 | 4096 x 1 x 1 x 1 | F32 | 32.0000 | +| 362 | blk.39.ffn_up.weight | Block 39 Feed-Forward Network "Up" (W) | (~52M) 52428800 | 4096 x 12800 x 1 x 1 | Q8_0 | 8.5000 | + +- Total elements in blk.39: (~199M) 199237632 +- Percentage of total elements: 2.27% +- Bits per Weight (BPW) for blk.39: 8.5010 bits + + +Total BPW for granite-4.1-8b-Q8_0.gguf: 8.4988 bits diff --git a/scores/granite-4.1-8b-q1_l.arc b/scores/granite-4.1-8b-q1_l.arc new file mode 100644 index 0000000..d8bcaaa --- /dev/null +++ b/scores/granite-4.1-8b-q1_l.arc @@ -0,0 +1,18 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q1_L.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 2 tensors +llama_model_loader: - type iq2_xxs: 1 tensors +llama_model_loader: - type iq2_xs: 22 tensors +llama_model_loader: - type iq1_s: 209 tensors +llama_model_loader: - type iq2_s: 17 tensors +llama_model_loader: - type iq1_m: 31 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ1_M - 1.75 bpw +print_info: file size = 1.79 GiB (1.75 BPW) + +multiple_choice_score: there are 869 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 869 tasks available +multiple_choice_score : calculating ARC Challenge score over 750 tasks. + +Final result: 36.5333 +/- 1.7594 +Random chance: 25.0083 +/- 1.5824 diff --git a/scores/granite-4.1-8b-q1_l.gpqa b/scores/granite-4.1-8b-q1_l.gpqa new file mode 100644 index 0000000..bb8000c --- /dev/null +++ b/scores/granite-4.1-8b-q1_l.gpqa @@ -0,0 +1,18 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q1_L.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 2 tensors +llama_model_loader: - type iq2_xxs: 1 tensors +llama_model_loader: - type iq2_xs: 22 tensors +llama_model_loader: - type iq1_s: 209 tensors +llama_model_loader: - type iq2_s: 17 tensors +llama_model_loader: - type iq1_m: 31 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ1_M - 1.75 bpw +print_info: file size = 1.79 GiB (1.75 BPW) + +multiple_choice_score: there are 198 tasks in prompt +multiple_choice_score: reading tasks......................................................................................................................................................................................................done +multiple_choice_score : calculating GPQA-Diamond score over 198 tasks. + +Final result: 19.1919 +/- 2.8058 +Random chance: 24.5963 +/- 3.0683 diff --git a/scores/granite-4.1-8b-q1_l.hsw b/scores/granite-4.1-8b-q1_l.hsw new file mode 100644 index 0000000..7ea7641 --- /dev/null +++ b/scores/granite-4.1-8b-q1_l.hsw @@ -0,0 +1,17 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q1_L.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 2 tensors +llama_model_loader: - type iq2_xxs: 1 tensors +llama_model_loader: - type iq2_xs: 22 tensors +llama_model_loader: - type iq1_s: 209 tensors +llama_model_loader: - type iq2_s: 17 tensors +llama_model_loader: - type iq1_m: 31 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ1_M - 1.75 bpw +print_info: file size = 1.79 GiB (1.75 BPW) + +hellaswag_score : loaded 10042 tasks from prompt. +hellaswag_score : selecting 750 randomized tasks. +hellaswag_score : calculating hellaswag score over selected tasks. + +750 36.00000000% [32.6441%, 39.4986%] diff --git a/scores/granite-4.1-8b-q1_l.mmlu b/scores/granite-4.1-8b-q1_l.mmlu new file mode 100644 index 0000000..3ce598f --- /dev/null +++ b/scores/granite-4.1-8b-q1_l.mmlu @@ -0,0 +1,18 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q1_L.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 2 tensors +llama_model_loader: - type iq2_xxs: 1 tensors +llama_model_loader: - type iq2_xs: 22 tensors +llama_model_loader: - type iq1_s: 209 tensors +llama_model_loader: - type iq2_s: 17 tensors +llama_model_loader: - type iq1_m: 31 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ1_M - 1.75 bpw +print_info: file size = 1.79 GiB (1.75 BPW) + +multiple_choice_score: there are 5362 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 5362 tasks available +multiple_choice_score : calculating MMLU-Redux score over 750 tasks. + +Final result: 27.2000 +/- 1.6260 +Random chance: 25.0000 +/- 1.5822 diff --git a/scores/granite-4.1-8b-q1_l.ppx b/scores/granite-4.1-8b-q1_l.ppx new file mode 100644 index 0000000..31fb986 --- /dev/null +++ b/scores/granite-4.1-8b-q1_l.ppx @@ -0,0 +1,52 @@ +Will perform strided perplexity calculation -> adjusting context size from 3072 to 3264 +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q1_L.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 2 tensors +llama_model_loader: - type iq2_xxs: 1 tensors +llama_model_loader: - type iq2_xs: 22 tensors +llama_model_loader: - type iq1_s: 209 tensors +llama_model_loader: - type iq2_s: 17 tensors +llama_model_loader: - type iq1_m: 31 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ1_M - 1.75 bpw +print_info: file size = 1.79 GiB (1.75 BPW) + +====== Perplexity statistics ====== +Mean PPL(Q) : 87.318832 ± 0.781580 +Mean PPL(base) : 8.691178 ± 0.065443 +Cor(ln(PPL(Q)), ln(PPL(base))): 57.61% +Mean ln(PPL(Q)/PPL(base)) : 2.307258 ± 0.007692 +Mean PPL(Q)/PPL(base) : 10.046835 ± 0.077278 +Mean PPL(Q)-PPL(base) : 78.627654 ± 0.745801 + +====== KL divergence statistics ====== +Mean KLD: 2.889523 ± 0.005948 +Maximum KLD: 18.836405 +99.9% KLD: 13.080500 +99.0% KLD: 10.150911 +95.0% KLD: 7.400433 +90.0% KLD: 6.028920 +Median KLD: 2.342337 +10.0% KLD: 0.519454 + 5.0% KLD: 0.230382 + 1.0% KLD: 0.035005 + 0.1% KLD: 0.005322 +Minimum KLD: 0.000491 + +====== Token probability statistics ====== +Mean Δp: -31.301 ± 0.094 % +Maximum Δp: 92.012% +99.9% Δp: 58.470% +99.0% Δp: 27.386% +95.0% Δp: 4.750% +90.0% Δp: 0.243% +75.0% Δp: -0.866% +Median Δp: -17.956% +25.0% Δp: -60.731% +10.0% Δp: -90.577% + 5.0% Δp: -97.636% + 1.0% Δp: -99.874% + 0.1% Δp: -99.991% +Minimum Δp: -100.000% +RMS Δp : 47.467 ± 0.088 % +Same top p: 34.309 ± 0.125 % diff --git a/scores/granite-4.1-8b-q1_l.tqa b/scores/granite-4.1-8b-q1_l.tqa new file mode 100644 index 0000000..692bdcf --- /dev/null +++ b/scores/granite-4.1-8b-q1_l.tqa @@ -0,0 +1,18 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q1_L.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 2 tensors +llama_model_loader: - type iq2_xxs: 1 tensors +llama_model_loader: - type iq2_xs: 22 tensors +llama_model_loader: - type iq1_s: 209 tensors +llama_model_loader: - type iq2_s: 17 tensors +llama_model_loader: - type iq1_m: 31 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ1_M - 1.75 bpw +print_info: file size = 1.79 GiB (1.75 BPW) + +multiple_choice_score: there are 817 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 817 tasks available +multiple_choice_score : calculating TruthfulQA score over 750 tasks. + +Final result: 28.9333 +/- 1.6569 +Random chance: 19.8992 +/- 1.4588 diff --git a/scores/granite-4.1-8b-q1_l.wng b/scores/granite-4.1-8b-q1_l.wng new file mode 100644 index 0000000..a8befd7 --- /dev/null +++ b/scores/granite-4.1-8b-q1_l.wng @@ -0,0 +1,16 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q1_L.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 2 tensors +llama_model_loader: - type iq2_xxs: 1 tensors +llama_model_loader: - type iq2_xs: 22 tensors +llama_model_loader: - type iq1_s: 209 tensors +llama_model_loader: - type iq2_s: 17 tensors +llama_model_loader: - type iq1_m: 31 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ1_M - 1.75 bpw +print_info: file size = 1.79 GiB (1.75 BPW) + +winogrande_score : loaded 1266 tasks from prompt. +winogrande_score : selecting 750 random tasks + +Final Winogrande score(750 tasks): 52.5333 +/- 1.8246 diff --git a/scores/granite-4.1-8b-q2_k.arc b/scores/granite-4.1-8b-q2_k.arc new file mode 100644 index 0000000..68018ef --- /dev/null +++ b/scores/granite-4.1-8b-q2_k.arc @@ -0,0 +1,21 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q2_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 35 tensors +llama_model_loader: - type q3_K: 1 tensors +llama_model_loader: - type iq2_xxs: 41 tensors +llama_model_loader: - type iq2_xs: 56 tensors +llama_model_loader: - type iq3_xxs: 6 tensors +llama_model_loader: - type iq1_s: 2 tensors +llama_model_loader: - type iq3_s: 38 tensors +llama_model_loader: - type iq2_s: 99 tensors +llama_model_loader: - type iq1_m: 4 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ2_S - 2.5 bpw +print_info: file size = 2.56 GiB (2.50 BPW) + +multiple_choice_score: there are 869 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 869 tasks available +multiple_choice_score : calculating ARC Challenge score over 750 tasks. + +Final result: 60.4000 +/- 1.7870 +Random chance: 25.0083 +/- 1.5824 diff --git a/scores/granite-4.1-8b-q2_k.gpqa b/scores/granite-4.1-8b-q2_k.gpqa new file mode 100644 index 0000000..f246d97 --- /dev/null +++ b/scores/granite-4.1-8b-q2_k.gpqa @@ -0,0 +1,21 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q2_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 35 tensors +llama_model_loader: - type q3_K: 1 tensors +llama_model_loader: - type iq2_xxs: 41 tensors +llama_model_loader: - type iq2_xs: 56 tensors +llama_model_loader: - type iq3_xxs: 6 tensors +llama_model_loader: - type iq1_s: 2 tensors +llama_model_loader: - type iq3_s: 38 tensors +llama_model_loader: - type iq2_s: 99 tensors +llama_model_loader: - type iq1_m: 4 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ2_S - 2.5 bpw +print_info: file size = 2.56 GiB (2.50 BPW) + +multiple_choice_score: there are 198 tasks in prompt +multiple_choice_score: reading tasks......................................................................................................................................................................................................done +multiple_choice_score : calculating GPQA-Diamond score over 198 tasks. + +Final result: 29.7980 +/- 3.2586 +Random chance: 24.5963 +/- 3.0683 diff --git a/scores/granite-4.1-8b-q2_k.hsw b/scores/granite-4.1-8b-q2_k.hsw new file mode 100644 index 0000000..27eb0b9 --- /dev/null +++ b/scores/granite-4.1-8b-q2_k.hsw @@ -0,0 +1,20 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q2_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 35 tensors +llama_model_loader: - type q3_K: 1 tensors +llama_model_loader: - type iq2_xxs: 41 tensors +llama_model_loader: - type iq2_xs: 56 tensors +llama_model_loader: - type iq3_xxs: 6 tensors +llama_model_loader: - type iq1_s: 2 tensors +llama_model_loader: - type iq3_s: 38 tensors +llama_model_loader: - type iq2_s: 99 tensors +llama_model_loader: - type iq1_m: 4 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ2_S - 2.5 bpw +print_info: file size = 2.56 GiB (2.50 BPW) + +hellaswag_score : loaded 10042 tasks from prompt. +hellaswag_score : selecting 750 randomized tasks. +hellaswag_score : calculating hellaswag score over selected tasks. + +750 70.00000000% [66.6252%, 73.1710%] diff --git a/scores/granite-4.1-8b-q2_k.mmlu b/scores/granite-4.1-8b-q2_k.mmlu new file mode 100644 index 0000000..04b180c --- /dev/null +++ b/scores/granite-4.1-8b-q2_k.mmlu @@ -0,0 +1,21 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q2_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 35 tensors +llama_model_loader: - type q3_K: 1 tensors +llama_model_loader: - type iq2_xxs: 41 tensors +llama_model_loader: - type iq2_xs: 56 tensors +llama_model_loader: - type iq3_xxs: 6 tensors +llama_model_loader: - type iq1_s: 2 tensors +llama_model_loader: - type iq3_s: 38 tensors +llama_model_loader: - type iq2_s: 99 tensors +llama_model_loader: - type iq1_m: 4 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ2_S - 2.5 bpw +print_info: file size = 2.56 GiB (2.50 BPW) + +multiple_choice_score: there are 5362 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 5362 tasks available +multiple_choice_score : calculating MMLU-Redux score over 750 tasks. + +Final result: 59.2000 +/- 1.7958 +Random chance: 25.0000 +/- 1.5822 diff --git a/scores/granite-4.1-8b-q2_k.ppx b/scores/granite-4.1-8b-q2_k.ppx new file mode 100644 index 0000000..1dda0b3 --- /dev/null +++ b/scores/granite-4.1-8b-q2_k.ppx @@ -0,0 +1,55 @@ +Will perform strided perplexity calculation -> adjusting context size from 3072 to 3264 +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q2_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 35 tensors +llama_model_loader: - type q3_K: 1 tensors +llama_model_loader: - type iq2_xxs: 41 tensors +llama_model_loader: - type iq2_xs: 56 tensors +llama_model_loader: - type iq3_xxs: 6 tensors +llama_model_loader: - type iq1_s: 2 tensors +llama_model_loader: - type iq3_s: 38 tensors +llama_model_loader: - type iq2_s: 99 tensors +llama_model_loader: - type iq1_m: 4 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ2_S - 2.5 bpw +print_info: file size = 2.56 GiB (2.50 BPW) + +====== Perplexity statistics ====== +Mean PPL(Q) : 12.534216 ± 0.095606 +Mean PPL(base) : 8.691178 ± 0.065443 +Cor(ln(PPL(Q)), ln(PPL(base))): 86.12% +Mean ln(PPL(Q)/PPL(base)) : 0.366154 ± 0.003993 +Mean PPL(Q)/PPL(base) : 1.442177 ± 0.005759 +Mean PPL(Q)-PPL(base) : 3.843038 ± 0.051440 + +====== KL divergence statistics ====== +Mean KLD: 0.644965 ± 0.002755 +Maximum KLD: 18.493736 +99.9% KLD: 10.059598 +99.0% KLD: 5.407475 +95.0% KLD: 2.364230 +90.0% KLD: 1.477545 +Median KLD: 0.339213 +10.0% KLD: 0.010624 + 5.0% KLD: 0.002467 + 1.0% KLD: 0.000277 + 0.1% KLD: 0.000030 +Minimum KLD: -0.000000 + +====== Token probability statistics ====== +Mean Δp: -7.810 ± 0.058 % +Maximum Δp: 99.884% +99.9% Δp: 73.625% +99.0% Δp: 40.369% +95.0% Δp: 16.588% +90.0% Δp: 7.372% +75.0% Δp: 0.201% +Median Δp: -0.916% +25.0% Δp: -11.370% +10.0% Δp: -33.838% + 5.0% Δp: -54.602% + 1.0% Δp: -93.772% + 0.1% Δp: -99.844% +Minimum Δp: -99.999% +RMS Δp : 23.380 ± 0.085 % +Same top p: 67.231 ± 0.124 % diff --git a/scores/granite-4.1-8b-q2_k.tqa b/scores/granite-4.1-8b-q2_k.tqa new file mode 100644 index 0000000..347a1e7 --- /dev/null +++ b/scores/granite-4.1-8b-q2_k.tqa @@ -0,0 +1,21 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q2_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 35 tensors +llama_model_loader: - type q3_K: 1 tensors +llama_model_loader: - type iq2_xxs: 41 tensors +llama_model_loader: - type iq2_xs: 56 tensors +llama_model_loader: - type iq3_xxs: 6 tensors +llama_model_loader: - type iq1_s: 2 tensors +llama_model_loader: - type iq3_s: 38 tensors +llama_model_loader: - type iq2_s: 99 tensors +llama_model_loader: - type iq1_m: 4 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ2_S - 2.5 bpw +print_info: file size = 2.56 GiB (2.50 BPW) + +multiple_choice_score: there are 817 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 817 tasks available +multiple_choice_score : calculating TruthfulQA score over 750 tasks. + +Final result: 33.4667 +/- 1.7242 +Random chance: 19.8992 +/- 1.4588 diff --git a/scores/granite-4.1-8b-q2_k.wng b/scores/granite-4.1-8b-q2_k.wng new file mode 100644 index 0000000..a1d61b3 --- /dev/null +++ b/scores/granite-4.1-8b-q2_k.wng @@ -0,0 +1,19 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q2_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 35 tensors +llama_model_loader: - type q3_K: 1 tensors +llama_model_loader: - type iq2_xxs: 41 tensors +llama_model_loader: - type iq2_xs: 56 tensors +llama_model_loader: - type iq3_xxs: 6 tensors +llama_model_loader: - type iq1_s: 2 tensors +llama_model_loader: - type iq3_s: 38 tensors +llama_model_loader: - type iq2_s: 99 tensors +llama_model_loader: - type iq1_m: 4 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ2_S - 2.5 bpw +print_info: file size = 2.56 GiB (2.50 BPW) + +winogrande_score : loaded 1266 tasks from prompt. +winogrande_score : selecting 750 random tasks + +Final Winogrande score(750 tasks): 65.2000 +/- 1.7405 diff --git a/scores/granite-4.1-8b-q3_k.arc b/scores/granite-4.1-8b-q3_k.arc new file mode 100644 index 0000000..3cead5f --- /dev/null +++ b/scores/granite-4.1-8b-q3_k.arc @@ -0,0 +1,20 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q3_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 1 tensors +llama_model_loader: - type q3_K: 1 tensors +llama_model_loader: - type q4_K: 33 tensors +llama_model_loader: - type iq2_xxs: 1 tensors +llama_model_loader: - type iq2_xs: 2 tensors +llama_model_loader: - type iq3_xxs: 31 tensors +llama_model_loader: - type iq3_s: 165 tensors +llama_model_loader: - type iq4_xs: 48 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ4_XS - 4.25 bpw +print_info: file size = 3.58 GiB (3.50 BPW) + +multiple_choice_score: there are 869 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 869 tasks available +multiple_choice_score : calculating ARC Challenge score over 750 tasks. + +Final result: 62.0000 +/- 1.7736 +Random chance: 25.0083 +/- 1.5824 diff --git a/scores/granite-4.1-8b-q3_k.gpqa b/scores/granite-4.1-8b-q3_k.gpqa new file mode 100644 index 0000000..eba00fe --- /dev/null +++ b/scores/granite-4.1-8b-q3_k.gpqa @@ -0,0 +1,20 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q3_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 1 tensors +llama_model_loader: - type q3_K: 1 tensors +llama_model_loader: - type q4_K: 33 tensors +llama_model_loader: - type iq2_xxs: 1 tensors +llama_model_loader: - type iq2_xs: 2 tensors +llama_model_loader: - type iq3_xxs: 31 tensors +llama_model_loader: - type iq3_s: 165 tensors +llama_model_loader: - type iq4_xs: 48 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ4_XS - 4.25 bpw +print_info: file size = 3.58 GiB (3.50 BPW) + +multiple_choice_score: there are 198 tasks in prompt +multiple_choice_score: reading tasks......................................................................................................................................................................................................done +multiple_choice_score : calculating GPQA-Diamond score over 198 tasks. + +Final result: 21.7172 +/- 2.9377 +Random chance: 24.5963 +/- 3.0683 diff --git a/scores/granite-4.1-8b-q3_k.hsw b/scores/granite-4.1-8b-q3_k.hsw new file mode 100644 index 0000000..833c97a --- /dev/null +++ b/scores/granite-4.1-8b-q3_k.hsw @@ -0,0 +1,19 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q3_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 1 tensors +llama_model_loader: - type q3_K: 1 tensors +llama_model_loader: - type q4_K: 33 tensors +llama_model_loader: - type iq2_xxs: 1 tensors +llama_model_loader: - type iq2_xs: 2 tensors +llama_model_loader: - type iq3_xxs: 31 tensors +llama_model_loader: - type iq3_s: 165 tensors +llama_model_loader: - type iq4_xs: 48 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ4_XS - 4.25 bpw +print_info: file size = 3.58 GiB (3.50 BPW) + +hellaswag_score : loaded 10042 tasks from prompt. +hellaswag_score : selecting 750 randomized tasks. +hellaswag_score : calculating hellaswag score over selected tasks. + +750 79.33333333% [76.2895%, 82.0782%] diff --git a/scores/granite-4.1-8b-q3_k.mmlu b/scores/granite-4.1-8b-q3_k.mmlu new file mode 100644 index 0000000..55d3f85 --- /dev/null +++ b/scores/granite-4.1-8b-q3_k.mmlu @@ -0,0 +1,20 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q3_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 1 tensors +llama_model_loader: - type q3_K: 1 tensors +llama_model_loader: - type q4_K: 33 tensors +llama_model_loader: - type iq2_xxs: 1 tensors +llama_model_loader: - type iq2_xs: 2 tensors +llama_model_loader: - type iq3_xxs: 31 tensors +llama_model_loader: - type iq3_s: 165 tensors +llama_model_loader: - type iq4_xs: 48 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ4_XS - 4.25 bpw +print_info: file size = 3.58 GiB (3.50 BPW) + +multiple_choice_score: there are 5362 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 5362 tasks available +multiple_choice_score : calculating MMLU-Redux score over 750 tasks. + +Final result: 69.2000 +/- 1.6869 +Random chance: 25.0000 +/- 1.5822 diff --git a/scores/granite-4.1-8b-q3_k.ppx b/scores/granite-4.1-8b-q3_k.ppx new file mode 100644 index 0000000..6444303 --- /dev/null +++ b/scores/granite-4.1-8b-q3_k.ppx @@ -0,0 +1,54 @@ +Will perform strided perplexity calculation -> adjusting context size from 3072 to 3264 +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q3_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 1 tensors +llama_model_loader: - type q3_K: 1 tensors +llama_model_loader: - type q4_K: 33 tensors +llama_model_loader: - type iq2_xxs: 1 tensors +llama_model_loader: - type iq2_xs: 2 tensors +llama_model_loader: - type iq3_xxs: 31 tensors +llama_model_loader: - type iq3_s: 165 tensors +llama_model_loader: - type iq4_xs: 48 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ4_XS - 4.25 bpw +print_info: file size = 3.58 GiB (3.50 BPW) + +====== Perplexity statistics ====== +Mean PPL(Q) : 9.381594 ± 0.070128 +Mean PPL(base) : 8.691178 ± 0.065443 +Cor(ln(PPL(Q)), ln(PPL(base))): 96.18% +Mean ln(PPL(Q)/PPL(base)) : 0.076441 ± 0.002075 +Mean PPL(Q)/PPL(base) : 1.079439 ± 0.002240 +Mean PPL(Q)-PPL(base) : 0.690416 ± 0.019310 + +====== KL divergence statistics ====== +Mean KLD: 0.173887 ± 0.001079 +Maximum KLD: 14.743266 +99.9% KLD: 5.111659 +99.0% KLD: 1.928733 +95.0% KLD: 0.623000 +90.0% KLD: 0.364871 +Median KLD: 0.072451 +10.0% KLD: 0.001391 + 5.0% KLD: 0.000306 + 1.0% KLD: 0.000027 + 0.1% KLD: 0.000001 +Minimum KLD: -0.000004 + +====== Token probability statistics ====== +Mean Δp: -2.503 ± 0.031 % +Maximum Δp: 96.204% +99.9% Δp: 57.084% +99.0% Δp: 26.183% +95.0% Δp: 10.344% +90.0% Δp: 5.013% +75.0% Δp: 0.347% +Median Δp: -0.138% +25.0% Δp: -3.676% +10.0% Δp: -12.687% + 5.0% Δp: -21.160% + 1.0% Δp: -51.918% + 0.1% Δp: -92.971% +Minimum Δp: -99.997% +RMS Δp : 12.185 ± 0.069 % +Same top p: 82.732 ± 0.100 % diff --git a/scores/granite-4.1-8b-q3_k.tqa b/scores/granite-4.1-8b-q3_k.tqa new file mode 100644 index 0000000..642c2e6 --- /dev/null +++ b/scores/granite-4.1-8b-q3_k.tqa @@ -0,0 +1,20 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q3_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 1 tensors +llama_model_loader: - type q3_K: 1 tensors +llama_model_loader: - type q4_K: 33 tensors +llama_model_loader: - type iq2_xxs: 1 tensors +llama_model_loader: - type iq2_xs: 2 tensors +llama_model_loader: - type iq3_xxs: 31 tensors +llama_model_loader: - type iq3_s: 165 tensors +llama_model_loader: - type iq4_xs: 48 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ4_XS - 4.25 bpw +print_info: file size = 3.58 GiB (3.50 BPW) + +multiple_choice_score: there are 817 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 817 tasks available +multiple_choice_score : calculating TruthfulQA score over 750 tasks. + +Final result: 39.6000 +/- 1.7870 +Random chance: 19.8992 +/- 1.4588 diff --git a/scores/granite-4.1-8b-q3_k.wng b/scores/granite-4.1-8b-q3_k.wng new file mode 100644 index 0000000..2362b73 --- /dev/null +++ b/scores/granite-4.1-8b-q3_k.wng @@ -0,0 +1,18 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q3_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q2_K: 1 tensors +llama_model_loader: - type q3_K: 1 tensors +llama_model_loader: - type q4_K: 33 tensors +llama_model_loader: - type iq2_xxs: 1 tensors +llama_model_loader: - type iq2_xs: 2 tensors +llama_model_loader: - type iq3_xxs: 31 tensors +llama_model_loader: - type iq3_s: 165 tensors +llama_model_loader: - type iq4_xs: 48 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = IQ4_XS - 4.25 bpw +print_info: file size = 3.58 GiB (3.50 BPW) + +winogrande_score : loaded 1266 tasks from prompt. +winogrande_score : selecting 750 random tasks + +Final Winogrande score(750 tasks): 71.7333 +/- 1.6453 diff --git a/scores/granite-4.1-8b-q4_k.arc b/scores/granite-4.1-8b-q4_k.arc new file mode 100644 index 0000000..9bc8e0a --- /dev/null +++ b/scores/granite-4.1-8b-q4_k.arc @@ -0,0 +1,17 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q4_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q4_K: 90 tensors +llama_model_loader: - type q5_K: 79 tensors +llama_model_loader: - type iq3_xxs: 2 tensors +llama_model_loader: - type iq3_s: 1 tensors +llama_model_loader: - type iq4_xs: 110 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q4_K - Medium +print_info: file size = 4.61 GiB (4.50 BPW) + +multiple_choice_score: there are 869 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 869 tasks available +multiple_choice_score : calculating ARC Challenge score over 750 tasks. + +Final result: 66.9333 +/- 1.7190 +Random chance: 25.0083 +/- 1.5824 diff --git a/scores/granite-4.1-8b-q4_k.gpqa b/scores/granite-4.1-8b-q4_k.gpqa new file mode 100644 index 0000000..5fc4023 --- /dev/null +++ b/scores/granite-4.1-8b-q4_k.gpqa @@ -0,0 +1,17 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q4_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q4_K: 90 tensors +llama_model_loader: - type q5_K: 79 tensors +llama_model_loader: - type iq3_xxs: 2 tensors +llama_model_loader: - type iq3_s: 1 tensors +llama_model_loader: - type iq4_xs: 110 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q4_K - Medium +print_info: file size = 4.61 GiB (4.50 BPW) + +multiple_choice_score: there are 198 tasks in prompt +multiple_choice_score: reading tasks......................................................................................................................................................................................................done +multiple_choice_score : calculating GPQA-Diamond score over 198 tasks. + +Final result: 23.2323 +/- 3.0089 +Random chance: 24.5963 +/- 3.0683 diff --git a/scores/granite-4.1-8b-q4_k.hsw b/scores/granite-4.1-8b-q4_k.hsw new file mode 100644 index 0000000..b4b4dc4 --- /dev/null +++ b/scores/granite-4.1-8b-q4_k.hsw @@ -0,0 +1,16 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q4_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q4_K: 90 tensors +llama_model_loader: - type q5_K: 79 tensors +llama_model_loader: - type iq3_xxs: 2 tensors +llama_model_loader: - type iq3_s: 1 tensors +llama_model_loader: - type iq4_xs: 110 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q4_K - Medium +print_info: file size = 4.61 GiB (4.50 BPW) + +hellaswag_score : loaded 10042 tasks from prompt. +hellaswag_score : selecting 750 randomized tasks. +hellaswag_score : calculating hellaswag score over selected tasks. + +750 79.73333333% [76.7082%, 82.4554%] diff --git a/scores/granite-4.1-8b-q4_k.mmlu b/scores/granite-4.1-8b-q4_k.mmlu new file mode 100644 index 0000000..4669798 --- /dev/null +++ b/scores/granite-4.1-8b-q4_k.mmlu @@ -0,0 +1,17 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q4_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q4_K: 90 tensors +llama_model_loader: - type q5_K: 79 tensors +llama_model_loader: - type iq3_xxs: 2 tensors +llama_model_loader: - type iq3_s: 1 tensors +llama_model_loader: - type iq4_xs: 110 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q4_K - Medium +print_info: file size = 4.61 GiB (4.50 BPW) + +multiple_choice_score: there are 5362 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 5362 tasks available +multiple_choice_score : calculating MMLU-Redux score over 750 tasks. + +Final result: 71.4667 +/- 1.6500 +Random chance: 25.0000 +/- 1.5822 diff --git a/scores/granite-4.1-8b-q4_k.ppx b/scores/granite-4.1-8b-q4_k.ppx new file mode 100644 index 0000000..608692f --- /dev/null +++ b/scores/granite-4.1-8b-q4_k.ppx @@ -0,0 +1,51 @@ +Will perform strided perplexity calculation -> adjusting context size from 3072 to 3264 +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q4_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q4_K: 90 tensors +llama_model_loader: - type q5_K: 79 tensors +llama_model_loader: - type iq3_xxs: 2 tensors +llama_model_loader: - type iq3_s: 1 tensors +llama_model_loader: - type iq4_xs: 110 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q4_K - Medium +print_info: file size = 4.61 GiB (4.50 BPW) + +====== Perplexity statistics ====== +Mean PPL(Q) : 8.867438 ± 0.067303 +Mean PPL(base) : 8.691178 ± 0.065443 +Cor(ln(PPL(Q)), ln(PPL(base))): 98.88% +Mean ln(PPL(Q)/PPL(base)) : 0.020077 ± 0.001134 +Mean PPL(Q)/PPL(base) : 1.020280 ± 0.001157 +Mean PPL(Q)-PPL(base) : 0.176260 ± 0.010114 + +====== KL divergence statistics ====== +Mean KLD: 0.047917 ± 0.000392 +Maximum KLD: 8.513770 +99.9% KLD: 1.939411 +99.0% KLD: 0.563371 +95.0% KLD: 0.167563 +90.0% KLD: 0.095562 +Median KLD: 0.017061 +10.0% KLD: 0.000210 + 5.0% KLD: 0.000043 + 1.0% KLD: 0.000003 + 0.1% KLD: -0.000001 +Minimum KLD: -0.000004 + +====== Token probability statistics ====== +Mean Δp: -0.365 ± 0.017 % +Maximum Δp: 95.922% +99.9% Δp: 43.653% +99.0% Δp: 16.925% +95.0% Δp: 6.946% +90.0% Δp: 3.742% +75.0% Δp: 0.557% +Median Δp: -0.002% +25.0% Δp: -0.901% +10.0% Δp: -4.570% + 5.0% Δp: -8.319% + 1.0% Δp: -22.260% + 0.1% Δp: -58.978% +Minimum Δp: -98.363% +RMS Δp : 6.446 ± 0.053 % +Same top p: 90.937 ± 0.076 % diff --git a/scores/granite-4.1-8b-q4_k.tqa b/scores/granite-4.1-8b-q4_k.tqa new file mode 100644 index 0000000..a390d93 --- /dev/null +++ b/scores/granite-4.1-8b-q4_k.tqa @@ -0,0 +1,17 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q4_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q4_K: 90 tensors +llama_model_loader: - type q5_K: 79 tensors +llama_model_loader: - type iq3_xxs: 2 tensors +llama_model_loader: - type iq3_s: 1 tensors +llama_model_loader: - type iq4_xs: 110 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q4_K - Medium +print_info: file size = 4.61 GiB (4.50 BPW) + +multiple_choice_score: there are 817 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 817 tasks available +multiple_choice_score : calculating TruthfulQA score over 750 tasks. + +Final result: 38.9333 +/- 1.7816 +Random chance: 19.8992 +/- 1.4588 diff --git a/scores/granite-4.1-8b-q4_k.wng b/scores/granite-4.1-8b-q4_k.wng new file mode 100644 index 0000000..b161cee --- /dev/null +++ b/scores/granite-4.1-8b-q4_k.wng @@ -0,0 +1,15 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q4_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q4_K: 90 tensors +llama_model_loader: - type q5_K: 79 tensors +llama_model_loader: - type iq3_xxs: 2 tensors +llama_model_loader: - type iq3_s: 1 tensors +llama_model_loader: - type iq4_xs: 110 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q4_K - Medium +print_info: file size = 4.61 GiB (4.50 BPW) + +winogrande_score : loaded 1266 tasks from prompt. +winogrande_score : selecting 750 random tasks + +Final Winogrande score(750 tasks): 73.4667 +/- 1.6132 diff --git a/scores/granite-4.1-8b-q5_k.arc b/scores/granite-4.1-8b-q5_k.arc new file mode 100644 index 0000000..94bd825 --- /dev/null +++ b/scores/granite-4.1-8b-q5_k.arc @@ -0,0 +1,17 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q5_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 1 tensors +llama_model_loader: - type q4_K: 26 tensors +llama_model_loader: - type q5_K: 246 tensors +llama_model_loader: - type q6_K: 4 tensors +llama_model_loader: - type iq4_xs: 5 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q5_K - Medium +print_info: file size = 5.63 GiB (5.50 BPW) + +multiple_choice_score: there are 869 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 869 tasks available +multiple_choice_score : calculating ARC Challenge score over 750 tasks. + +Final result: 66.4000 +/- 1.7259 +Random chance: 25.0083 +/- 1.5824 diff --git a/scores/granite-4.1-8b-q5_k.gpqa b/scores/granite-4.1-8b-q5_k.gpqa new file mode 100644 index 0000000..44010ed --- /dev/null +++ b/scores/granite-4.1-8b-q5_k.gpqa @@ -0,0 +1,17 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q5_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 1 tensors +llama_model_loader: - type q4_K: 26 tensors +llama_model_loader: - type q5_K: 246 tensors +llama_model_loader: - type q6_K: 4 tensors +llama_model_loader: - type iq4_xs: 5 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q5_K - Medium +print_info: file size = 5.63 GiB (5.50 BPW) + +multiple_choice_score: there are 198 tasks in prompt +multiple_choice_score: reading tasks......................................................................................................................................................................................................done +multiple_choice_score : calculating GPQA-Diamond score over 198 tasks. + +Final result: 22.7273 +/- 2.9858 +Random chance: 24.5963 +/- 3.0683 diff --git a/scores/granite-4.1-8b-q5_k.hsw b/scores/granite-4.1-8b-q5_k.hsw new file mode 100644 index 0000000..fd7f22f --- /dev/null +++ b/scores/granite-4.1-8b-q5_k.hsw @@ -0,0 +1,16 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q5_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 1 tensors +llama_model_loader: - type q4_K: 26 tensors +llama_model_loader: - type q5_K: 246 tensors +llama_model_loader: - type q6_K: 4 tensors +llama_model_loader: - type iq4_xs: 5 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q5_K - Medium +print_info: file size = 5.63 GiB (5.50 BPW) + +hellaswag_score : loaded 10042 tasks from prompt. +hellaswag_score : selecting 750 randomized tasks. +hellaswag_score : calculating hellaswag score over selected tasks. + +750 79.86666667% [76.8479%, 82.5810%] diff --git a/scores/granite-4.1-8b-q5_k.mmlu b/scores/granite-4.1-8b-q5_k.mmlu new file mode 100644 index 0000000..7d081e7 --- /dev/null +++ b/scores/granite-4.1-8b-q5_k.mmlu @@ -0,0 +1,17 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q5_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 1 tensors +llama_model_loader: - type q4_K: 26 tensors +llama_model_loader: - type q5_K: 246 tensors +llama_model_loader: - type q6_K: 4 tensors +llama_model_loader: - type iq4_xs: 5 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q5_K - Medium +print_info: file size = 5.63 GiB (5.50 BPW) + +multiple_choice_score: there are 5362 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 5362 tasks available +multiple_choice_score : calculating MMLU-Redux score over 750 tasks. + +Final result: 72.1333 +/- 1.6382 +Random chance: 25.0000 +/- 1.5822 diff --git a/scores/granite-4.1-8b-q5_k.ppx b/scores/granite-4.1-8b-q5_k.ppx new file mode 100644 index 0000000..d538c41 --- /dev/null +++ b/scores/granite-4.1-8b-q5_k.ppx @@ -0,0 +1,51 @@ +Will perform strided perplexity calculation -> adjusting context size from 3072 to 3264 +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q5_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 1 tensors +llama_model_loader: - type q4_K: 26 tensors +llama_model_loader: - type q5_K: 246 tensors +llama_model_loader: - type q6_K: 4 tensors +llama_model_loader: - type iq4_xs: 5 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q5_K - Medium +print_info: file size = 5.63 GiB (5.50 BPW) + +====== Perplexity statistics ====== +Mean PPL(Q) : 8.766150 ± 0.066421 +Mean PPL(base) : 8.691178 ± 0.065443 +Cor(ln(PPL(Q)), ln(PPL(base))): 99.48% +Mean ln(PPL(Q)/PPL(base)) : 0.008589 ± 0.000770 +Mean PPL(Q)/PPL(base) : 1.008626 ± 0.000776 +Mean PPL(Q)-PPL(base) : 0.074972 ± 0.006775 + +====== KL divergence statistics ====== +Mean KLD: 0.018940 ± 0.000165 +Maximum KLD: 5.148437 +99.9% KLD: 0.762867 +99.0% KLD: 0.213410 +95.0% KLD: 0.065414 +90.0% KLD: 0.037975 +Median KLD: 0.006922 +10.0% KLD: 0.000082 + 5.0% KLD: 0.000017 + 1.0% KLD: 0.000001 + 0.1% KLD: -0.000003 +Minimum KLD: -0.000006 + +====== Token probability statistics ====== +Mean Δp: -0.182 ± 0.011 % +Maximum Δp: 91.879% +99.9% Δp: 30.364% +99.0% Δp: 10.955% +95.0% Δp: 4.408% +90.0% Δp: 2.363% +75.0% Δp: 0.367% +Median Δp: -0.001% +25.0% Δp: -0.575% +10.0% Δp: -2.918% + 5.0% Δp: -5.194% + 1.0% Δp: -12.983% + 0.1% Δp: -33.902% +Minimum Δp: -90.554% +RMS Δp : 4.061 ± 0.040 % +Same top p: 94.120 ± 0.062 % diff --git a/scores/granite-4.1-8b-q5_k.tqa b/scores/granite-4.1-8b-q5_k.tqa new file mode 100644 index 0000000..6919fda --- /dev/null +++ b/scores/granite-4.1-8b-q5_k.tqa @@ -0,0 +1,17 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q5_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 1 tensors +llama_model_loader: - type q4_K: 26 tensors +llama_model_loader: - type q5_K: 246 tensors +llama_model_loader: - type q6_K: 4 tensors +llama_model_loader: - type iq4_xs: 5 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q5_K - Medium +print_info: file size = 5.63 GiB (5.50 BPW) + +multiple_choice_score: there are 817 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 817 tasks available +multiple_choice_score : calculating TruthfulQA score over 750 tasks. + +Final result: 38.5333 +/- 1.7783 +Random chance: 19.8992 +/- 1.4588 diff --git a/scores/granite-4.1-8b-q5_k.wng b/scores/granite-4.1-8b-q5_k.wng new file mode 100644 index 0000000..6fdde5e --- /dev/null +++ b/scores/granite-4.1-8b-q5_k.wng @@ -0,0 +1,15 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q5_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 1 tensors +llama_model_loader: - type q4_K: 26 tensors +llama_model_loader: - type q5_K: 246 tensors +llama_model_loader: - type q6_K: 4 tensors +llama_model_loader: - type iq4_xs: 5 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q5_K - Medium +print_info: file size = 5.63 GiB (5.50 BPW) + +winogrande_score : loaded 1266 tasks from prompt. +winogrande_score : selecting 750 random tasks + +Final Winogrande score(750 tasks): 73.4667 +/- 1.6132 diff --git a/scores/granite-4.1-8b-q6_k.arc b/scores/granite-4.1-8b-q6_k.arc new file mode 100644 index 0000000..cf0f3e4 --- /dev/null +++ b/scores/granite-4.1-8b-q6_k.arc @@ -0,0 +1,16 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q6_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q8_0: 35 tensors +llama_model_loader: - type q4_K: 2 tensors +llama_model_loader: - type q5_K: 75 tensors +llama_model_loader: - type q6_K: 170 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q6_K +print_info: file size = 6.65 GiB (6.50 BPW) + +multiple_choice_score: there are 869 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 869 tasks available +multiple_choice_score : calculating ARC Challenge score over 750 tasks. + +Final result: 67.0667 +/- 1.7172 +Random chance: 25.0083 +/- 1.5824 diff --git a/scores/granite-4.1-8b-q6_k.gpqa b/scores/granite-4.1-8b-q6_k.gpqa new file mode 100644 index 0000000..f1f0c07 --- /dev/null +++ b/scores/granite-4.1-8b-q6_k.gpqa @@ -0,0 +1,16 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q6_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q8_0: 35 tensors +llama_model_loader: - type q4_K: 2 tensors +llama_model_loader: - type q5_K: 75 tensors +llama_model_loader: - type q6_K: 170 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q6_K +print_info: file size = 6.65 GiB (6.50 BPW) + +multiple_choice_score: there are 198 tasks in prompt +multiple_choice_score: reading tasks......................................................................................................................................................................................................done +multiple_choice_score : calculating GPQA-Diamond score over 198 tasks. + +Final result: 24.7475 +/- 3.0746 +Random chance: 24.5963 +/- 3.0683 diff --git a/scores/granite-4.1-8b-q6_k.hsw b/scores/granite-4.1-8b-q6_k.hsw new file mode 100644 index 0000000..2b98dcc --- /dev/null +++ b/scores/granite-4.1-8b-q6_k.hsw @@ -0,0 +1,15 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q6_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q8_0: 35 tensors +llama_model_loader: - type q4_K: 2 tensors +llama_model_loader: - type q5_K: 75 tensors +llama_model_loader: - type q6_K: 170 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q6_K +print_info: file size = 6.65 GiB (6.50 BPW) + +hellaswag_score : loaded 10042 tasks from prompt. +hellaswag_score : selecting 750 randomized tasks. +hellaswag_score : calculating hellaswag score over selected tasks. + +750 80.13333333% [77.1274%, 82.8322%] diff --git a/scores/granite-4.1-8b-q6_k.mmlu b/scores/granite-4.1-8b-q6_k.mmlu new file mode 100644 index 0000000..7c3946a --- /dev/null +++ b/scores/granite-4.1-8b-q6_k.mmlu @@ -0,0 +1,16 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q6_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q8_0: 35 tensors +llama_model_loader: - type q4_K: 2 tensors +llama_model_loader: - type q5_K: 75 tensors +llama_model_loader: - type q6_K: 170 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q6_K +print_info: file size = 6.65 GiB (6.50 BPW) + +multiple_choice_score: there are 5362 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 5362 tasks available +multiple_choice_score : calculating MMLU-Redux score over 750 tasks. + +Final result: 72.6667 +/- 1.6284 +Random chance: 25.0000 +/- 1.5822 diff --git a/scores/granite-4.1-8b-q6_k.ppx b/scores/granite-4.1-8b-q6_k.ppx new file mode 100644 index 0000000..ac5f75c --- /dev/null +++ b/scores/granite-4.1-8b-q6_k.ppx @@ -0,0 +1,50 @@ +Will perform strided perplexity calculation -> adjusting context size from 3072 to 3264 +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q6_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q8_0: 35 tensors +llama_model_loader: - type q4_K: 2 tensors +llama_model_loader: - type q5_K: 75 tensors +llama_model_loader: - type q6_K: 170 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q6_K +print_info: file size = 6.65 GiB (6.50 BPW) + +====== Perplexity statistics ====== +Mean PPL(Q) : 8.755199 ± 0.066400 +Mean PPL(base) : 8.691178 ± 0.065443 +Cor(ln(PPL(Q)), ln(PPL(base))): 99.74% +Mean ln(PPL(Q)/PPL(base)) : 0.007339 ± 0.000550 +Mean PPL(Q)/PPL(base) : 1.007366 ± 0.000554 +Mean PPL(Q)-PPL(base) : 0.064021 ± 0.004871 + +====== KL divergence statistics ====== +Mean KLD: 0.007326 ± 0.000066 +Maximum KLD: 2.273445 +99.9% KLD: 0.273010 +99.0% KLD: 0.081142 +95.0% KLD: 0.025136 +90.0% KLD: 0.014795 +Median KLD: 0.002831 +10.0% KLD: 0.000033 + 5.0% KLD: 0.000007 + 1.0% KLD: 0.000000 + 0.1% KLD: -0.000003 +Minimum KLD: -0.000027 + +====== Token probability statistics ====== +Mean Δp: -0.107 ± 0.007 % +Maximum Δp: 79.111% +99.9% Δp: 18.154% +99.0% Δp: 7.012% +95.0% Δp: 2.856% +90.0% Δp: 1.538% +75.0% Δp: 0.232% +Median Δp: -0.001% +25.0% Δp: -0.371% +10.0% Δp: -1.892% + 5.0% Δp: -3.299% + 1.0% Δp: -7.834% + 0.1% Δp: -21.254% +Minimum Δp: -87.374% +RMS Δp : 2.562 ± 0.030 % +Same top p: 96.165 ± 0.051 % diff --git a/scores/granite-4.1-8b-q6_k.tqa b/scores/granite-4.1-8b-q6_k.tqa new file mode 100644 index 0000000..b409e4e --- /dev/null +++ b/scores/granite-4.1-8b-q6_k.tqa @@ -0,0 +1,16 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q6_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q8_0: 35 tensors +llama_model_loader: - type q4_K: 2 tensors +llama_model_loader: - type q5_K: 75 tensors +llama_model_loader: - type q6_K: 170 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q6_K +print_info: file size = 6.65 GiB (6.50 BPW) + +multiple_choice_score: there are 817 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 817 tasks available +multiple_choice_score : calculating TruthfulQA score over 750 tasks. + +Final result: 38.2667 +/- 1.7759 +Random chance: 19.8992 +/- 1.4588 diff --git a/scores/granite-4.1-8b-q6_k.wng b/scores/granite-4.1-8b-q6_k.wng new file mode 100644 index 0000000..c2688f1 --- /dev/null +++ b/scores/granite-4.1-8b-q6_k.wng @@ -0,0 +1,14 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q6_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q8_0: 35 tensors +llama_model_loader: - type q4_K: 2 tensors +llama_model_loader: - type q5_K: 75 tensors +llama_model_loader: - type q6_K: 170 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q6_K +print_info: file size = 6.65 GiB (6.50 BPW) + +winogrande_score : loaded 1266 tasks from prompt. +winogrande_score : selecting 750 random tasks + +Final Winogrande score(750 tasks): 73.7333 +/- 1.6080 diff --git a/scores/granite-4.1-8b-q7_k.arc b/scores/granite-4.1-8b-q7_k.arc new file mode 100644 index 0000000..05da546 --- /dev/null +++ b/scores/granite-4.1-8b-q7_k.arc @@ -0,0 +1,15 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q7_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 4 tensors +llama_model_loader: - type q8_0: 146 tensors +llama_model_loader: - type q6_K: 132 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q8_0 +print_info: file size = 7.68 GiB (7.50 BPW) + +multiple_choice_score: there are 869 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 869 tasks available +multiple_choice_score : calculating ARC Challenge score over 750 tasks. + +Final result: 66.4000 +/- 1.7259 +Random chance: 25.0083 +/- 1.5824 diff --git a/scores/granite-4.1-8b-q7_k.gpqa b/scores/granite-4.1-8b-q7_k.gpqa new file mode 100644 index 0000000..b2886c4 --- /dev/null +++ b/scores/granite-4.1-8b-q7_k.gpqa @@ -0,0 +1,15 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q7_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 4 tensors +llama_model_loader: - type q8_0: 146 tensors +llama_model_loader: - type q6_K: 132 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q8_0 +print_info: file size = 7.68 GiB (7.50 BPW) + +multiple_choice_score: there are 198 tasks in prompt +multiple_choice_score: reading tasks......................................................................................................................................................................................................done +multiple_choice_score : calculating GPQA-Diamond score over 198 tasks. + +Final result: 26.7677 +/- 3.1544 +Random chance: 24.5963 +/- 3.0683 diff --git a/scores/granite-4.1-8b-q7_k.hsw b/scores/granite-4.1-8b-q7_k.hsw new file mode 100644 index 0000000..96f49db --- /dev/null +++ b/scores/granite-4.1-8b-q7_k.hsw @@ -0,0 +1,14 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q7_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 4 tensors +llama_model_loader: - type q8_0: 146 tensors +llama_model_loader: - type q6_K: 132 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q8_0 +print_info: file size = 7.68 GiB (7.50 BPW) + +hellaswag_score : loaded 10042 tasks from prompt. +hellaswag_score : selecting 750 randomized tasks. +hellaswag_score : calculating hellaswag score over selected tasks. + +750 80.26666667% [77.2672%, 82.9576%] diff --git a/scores/granite-4.1-8b-q7_k.mmlu b/scores/granite-4.1-8b-q7_k.mmlu new file mode 100644 index 0000000..5781e09 --- /dev/null +++ b/scores/granite-4.1-8b-q7_k.mmlu @@ -0,0 +1,15 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q7_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 4 tensors +llama_model_loader: - type q8_0: 146 tensors +llama_model_loader: - type q6_K: 132 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q8_0 +print_info: file size = 7.68 GiB (7.50 BPW) + +multiple_choice_score: there are 5362 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 5362 tasks available +multiple_choice_score : calculating MMLU-Redux score over 750 tasks. + +Final result: 72.1333 +/- 1.6382 +Random chance: 25.0000 +/- 1.5822 diff --git a/scores/granite-4.1-8b-q7_k.ppx b/scores/granite-4.1-8b-q7_k.ppx new file mode 100644 index 0000000..83ab15b --- /dev/null +++ b/scores/granite-4.1-8b-q7_k.ppx @@ -0,0 +1,49 @@ +Will perform strided perplexity calculation -> adjusting context size from 3072 to 3264 +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q7_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 4 tensors +llama_model_loader: - type q8_0: 146 tensors +llama_model_loader: - type q6_K: 132 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q8_0 +print_info: file size = 7.68 GiB (7.50 BPW) + +====== Perplexity statistics ====== +Mean PPL(Q) : 8.751241 ± 0.066500 +Mean PPL(base) : 8.691178 ± 0.065443 +Cor(ln(PPL(Q)), ln(PPL(base))): 99.82% +Mean ln(PPL(Q)/PPL(base)) : 0.006887 ± 0.000464 +Mean PPL(Q)/PPL(base) : 1.006911 ± 0.000467 +Mean PPL(Q)-PPL(base) : 0.060063 ± 0.004141 + +====== KL divergence statistics ====== +Mean KLD: 0.003568 ± 0.000040 +Maximum KLD: 2.888946 +99.9% KLD: 0.140997 +99.0% KLD: 0.037132 +95.0% KLD: 0.011608 +90.0% KLD: 0.006961 +Median KLD: 0.001456 +10.0% KLD: 0.000015 + 5.0% KLD: 0.000003 + 1.0% KLD: -0.000000 + 0.1% KLD: -0.000004 +Minimum KLD: -0.000012 + +====== Token probability statistics ====== +Mean Δp: -0.007 ± 0.005 % +Maximum Δp: 81.280% +99.9% Δp: 12.574% +99.0% Δp: 4.968% +95.0% Δp: 2.242% +90.0% Δp: 1.259% +75.0% Δp: 0.222% +Median Δp: -0.000% +25.0% Δp: -0.211% +10.0% Δp: -1.245% + 5.0% Δp: -2.232% + 1.0% Δp: -5.277% + 0.1% Δp: -14.266% +Minimum Δp: -57.371% +RMS Δp : 1.812 ± 0.025 % +Same top p: 97.235 ± 0.043 % diff --git a/scores/granite-4.1-8b-q7_k.tqa b/scores/granite-4.1-8b-q7_k.tqa new file mode 100644 index 0000000..dd1c66d --- /dev/null +++ b/scores/granite-4.1-8b-q7_k.tqa @@ -0,0 +1,15 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q7_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 4 tensors +llama_model_loader: - type q8_0: 146 tensors +llama_model_loader: - type q6_K: 132 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q8_0 +print_info: file size = 7.68 GiB (7.50 BPW) + +multiple_choice_score: there are 817 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 817 tasks available +multiple_choice_score : calculating TruthfulQA score over 750 tasks. + +Final result: 38.5333 +/- 1.7783 +Random chance: 19.8992 +/- 1.4588 diff --git a/scores/granite-4.1-8b-q7_k.wng b/scores/granite-4.1-8b-q7_k.wng new file mode 100644 index 0000000..04eee15 --- /dev/null +++ b/scores/granite-4.1-8b-q7_k.wng @@ -0,0 +1,13 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q7_K.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 4 tensors +llama_model_loader: - type q8_0: 146 tensors +llama_model_loader: - type q6_K: 132 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q8_0 +print_info: file size = 7.68 GiB (7.50 BPW) + +winogrande_score : loaded 1266 tasks from prompt. +winogrande_score : selecting 750 random tasks + +Final Winogrande score(750 tasks): 73.6000 +/- 1.6106 diff --git a/scores/granite-4.1-8b-q8_0.arc b/scores/granite-4.1-8b-q8_0.arc new file mode 100644 index 0000000..28b8faa --- /dev/null +++ b/scores/granite-4.1-8b-q8_0.arc @@ -0,0 +1,16 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q8_0.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 2 tensors +llama_model_loader: - type q8_0: 266 tensors +llama_model_loader: - type q6_K: 2 tensors +llama_model_loader: - type bf16: 12 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q8_0 +print_info: file size = 8.70 GiB (8.50 BPW) + +multiple_choice_score: there are 869 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 869 tasks available +multiple_choice_score : calculating ARC Challenge score over 750 tasks. + +Final result: 66.8000 +/- 1.7207 +Random chance: 25.0083 +/- 1.5824 diff --git a/scores/granite-4.1-8b-q8_0.gpqa b/scores/granite-4.1-8b-q8_0.gpqa new file mode 100644 index 0000000..a59cab4 --- /dev/null +++ b/scores/granite-4.1-8b-q8_0.gpqa @@ -0,0 +1,16 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q8_0.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 2 tensors +llama_model_loader: - type q8_0: 266 tensors +llama_model_loader: - type q6_K: 2 tensors +llama_model_loader: - type bf16: 12 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q8_0 +print_info: file size = 8.70 GiB (8.50 BPW) + +multiple_choice_score: there are 198 tasks in prompt +multiple_choice_score: reading tasks......................................................................................................................................................................................................done +multiple_choice_score : calculating GPQA-Diamond score over 198 tasks. + +Final result: 26.7677 +/- 3.1544 +Random chance: 24.5963 +/- 3.0683 diff --git a/scores/granite-4.1-8b-q8_0.hsw b/scores/granite-4.1-8b-q8_0.hsw new file mode 100644 index 0000000..5a45a42 --- /dev/null +++ b/scores/granite-4.1-8b-q8_0.hsw @@ -0,0 +1,15 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q8_0.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 2 tensors +llama_model_loader: - type q8_0: 266 tensors +llama_model_loader: - type q6_K: 2 tensors +llama_model_loader: - type bf16: 12 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q8_0 +print_info: file size = 8.70 GiB (8.50 BPW) + +hellaswag_score : loaded 10042 tasks from prompt. +hellaswag_score : selecting 750 randomized tasks. +hellaswag_score : calculating hellaswag score over selected tasks. + +750 80.53333333% [77.5470%, 83.2085%] diff --git a/scores/granite-4.1-8b-q8_0.mmlu b/scores/granite-4.1-8b-q8_0.mmlu new file mode 100644 index 0000000..5f07119 --- /dev/null +++ b/scores/granite-4.1-8b-q8_0.mmlu @@ -0,0 +1,16 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q8_0.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 2 tensors +llama_model_loader: - type q8_0: 266 tensors +llama_model_loader: - type q6_K: 2 tensors +llama_model_loader: - type bf16: 12 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q8_0 +print_info: file size = 8.70 GiB (8.50 BPW) + +multiple_choice_score: there are 5362 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 5362 tasks available +multiple_choice_score : calculating MMLU-Redux score over 750 tasks. + +Final result: 72.4000 +/- 1.6334 +Random chance: 25.0000 +/- 1.5822 diff --git a/scores/granite-4.1-8b-q8_0.ppx b/scores/granite-4.1-8b-q8_0.ppx new file mode 100644 index 0000000..7ab851b --- /dev/null +++ b/scores/granite-4.1-8b-q8_0.ppx @@ -0,0 +1,50 @@ +Will perform strided perplexity calculation -> adjusting context size from 3072 to 3264 +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q8_0.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 2 tensors +llama_model_loader: - type q8_0: 266 tensors +llama_model_loader: - type q6_K: 2 tensors +llama_model_loader: - type bf16: 12 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q8_0 +print_info: file size = 8.70 GiB (8.50 BPW) + +====== Perplexity statistics ====== +Mean PPL(Q) : 8.749119 ± 0.066517 +Mean PPL(base) : 8.691178 ± 0.065443 +Cor(ln(PPL(Q)), ln(PPL(base))): 99.85% +Mean ln(PPL(Q)/PPL(base)) : 0.006644 ± 0.000418 +Mean PPL(Q)/PPL(base) : 1.006667 ± 0.000421 +Mean PPL(Q)-PPL(base) : 0.057941 ± 0.003751 + +====== KL divergence statistics ====== +Mean KLD: 0.002052 ± 0.000024 +Maximum KLD: 1.924665 +99.9% KLD: 0.070725 +99.0% KLD: 0.018533 +95.0% KLD: 0.006534 +90.0% KLD: 0.004148 +Median KLD: 0.000965 +10.0% KLD: 0.000008 + 5.0% KLD: 0.000002 + 1.0% KLD: -0.000001 + 0.1% KLD: -0.000004 +Minimum KLD: -0.000013 + +====== Token probability statistics ====== +Mean Δp: 0.031 ± 0.004 % +Maximum Δp: 67.287% +99.9% Δp: 9.257% +99.0% Δp: 4.075% +95.0% Δp: 1.929% +90.0% Δp: 1.101% +75.0% Δp: 0.201% +Median Δp: 0.000% +25.0% Δp: -0.151% +10.0% Δp: -0.973% + 5.0% Δp: -1.750% + 1.0% Δp: -3.980% + 0.1% Δp: -9.904% +Minimum Δp: -53.630% +RMS Δp : 1.396 ± 0.020 % +Same top p: 97.749 ± 0.039 % diff --git a/scores/granite-4.1-8b-q8_0.tqa b/scores/granite-4.1-8b-q8_0.tqa new file mode 100644 index 0000000..a56bee2 --- /dev/null +++ b/scores/granite-4.1-8b-q8_0.tqa @@ -0,0 +1,16 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q8_0.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 2 tensors +llama_model_loader: - type q8_0: 266 tensors +llama_model_loader: - type q6_K: 2 tensors +llama_model_loader: - type bf16: 12 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q8_0 +print_info: file size = 8.70 GiB (8.50 BPW) + +multiple_choice_score: there are 817 tasks in prompt +multiple_choice_score: selecting 750 random tasks from 817 tasks available +multiple_choice_score : calculating TruthfulQA score over 750 tasks. + +Final result: 38.4000 +/- 1.7771 +Random chance: 19.8992 +/- 1.4588 diff --git a/scores/granite-4.1-8b-q8_0.wng b/scores/granite-4.1-8b-q8_0.wng new file mode 100644 index 0000000..36b568b --- /dev/null +++ b/scores/granite-4.1-8b-q8_0.wng @@ -0,0 +1,14 @@ +llama_model_loader: loaded meta data with 39 key-value pairs and 363 tensors from granite-4.1-8b-Q8_0.gguf (version GGUF V3 (latest)) +llama_model_loader: - type f32: 81 tensors +llama_model_loader: - type q5_1: 2 tensors +llama_model_loader: - type q8_0: 266 tensors +llama_model_loader: - type q6_K: 2 tensors +llama_model_loader: - type bf16: 12 tensors +print_info: file format = GGUF V3 (latest) +print_info: file type = Q8_0 +print_info: file size = 8.70 GiB (8.50 BPW) + +winogrande_score : loaded 1266 tasks from prompt. +winogrande_score : selecting 750 random tasks + +Final Winogrande score(750 tasks): 73.2000 +/- 1.6184 diff --git a/scores/granite-4.1-8b.itx b/scores/granite-4.1-8b.itx new file mode 100644 index 0000000..ea0e149 --- /dev/null +++ b/scores/granite-4.1-8b.itx @@ -0,0 +1,332 @@ +ggml_cuda_init: found 1 CUDA devices (Total VRAM: 124610 MiB): + Device 0: NVIDIA GB10, compute capability 12.1, VMM: yes, VRAM: 124610 MiB + +Computing statistics for imatrix/imatrix-granite-4.1-8b-medium.gguf (280 tensors) + + Layer Tensor Σ(Act²) Min Max μ σ % Active N Entropy E (norm) ZD CosSim +========================================================================================================================================================================= + 39 attn_k 174643.00 2.0243 18914.7031 42.64 497.38 100.00% 4096 8.8904 74.09% 0.37% 0.8865 + 38 attn_k 166087.69 0.0000 20050.7402 40.55 448.81 99.98% 4096 8.6133 71.78% 0.46% 0.9835 + 37 attn_k 146614.73 0.7595 20756.5176 35.79 423.56 100.00% 4096 8.3834 69.86% 0.44% 0.9842 + 36 attn_k 134513.23 0.5200 19003.3398 32.84 386.52 100.00% 4096 8.2079 68.40% 0.51% 0.9501 + 35 attn_k 124035.38 1.3198 11533.5068 30.28 305.84 100.00% 4096 8.2914 69.09% 0.71% 0.9665 + 34 attn_k 123896.47 0.5387 17763.3496 30.25 472.60 100.00% 4096 6.6833 55.69% 0.44% 0.9800 + 32 attn_k 111128.92 0.1590 25305.4961 27.13 504.76 100.00% 4096 6.2867 52.39% 0.37% 0.9931 + 30 attn_k 109705.93 0.0000 33505.8359 26.78 617.29 99.98% 4096 5.4138 45.11% 0.27% 0.9759 + 33 attn_k 109045.48 0.9963 20537.8262 26.62 448.68 100.00% 4096 6.6424 55.35% 0.37% 0.9914 + 29 attn_k 87316.21 0.0000 20308.4004 21.32 398.84 99.95% 4096 6.1991 51.66% 0.37% 0.9936 + 31 attn_k 87185.37 0.0000 22471.5254 21.29 421.05 99.98% 4096 6.2130 51.78% 0.34% 0.9963 + 28 attn_k 82016.98 0.0000 23024.2656 20.02 430.64 99.95% 4096 5.6830 47.36% 0.29% 0.9976 + 27 attn_k 76821.73 0.0000 17911.5156 18.76 348.43 99.98% 4096 6.3444 52.87% 0.34% 0.9791 + 16 attn_k 76133.66 0.0000 11891.1016 18.59 311.73 99.93% 4096 6.1208 51.01% 0.42% 0.9643 + 23 attn_k 74987.37 0.0000 18502.6172 18.31 356.15 99.95% 4096 6.1167 50.97% 0.39% 0.9980 + 22 attn_k 71226.52 0.0000 18282.5273 17.39 343.00 99.98% 4096 6.2861 52.38% 0.32% 0.9901 + 21 attn_k 69479.59 0.0000 14531.5439 16.96 297.70 99.95% 4096 6.6280 55.23% 0.39% 0.9882 + 20 attn_k 68286.74 0.0000 18747.7070 16.67 345.57 99.95% 4096 6.0506 50.42% 0.34% 0.9917 + 25 attn_k 68221.13 0.0000 14562.2783 16.66 294.34 99.98% 4096 6.3805 53.17% 0.39% 0.9829 + 7 attn_k 68092.71 0.0000 17005.3926 16.62 348.43 99.90% 4096 5.9826 49.85% 0.32% 0.9814 + 18 attn_k 66295.24 0.0000 12255.0391 16.19 262.31 99.95% 4096 6.9157 57.63% 0.42% 0.9978 + 26 attn_k 65291.02 0.0000 12545.3467 15.94 268.54 99.98% 4096 6.6067 55.06% 0.39% 0.9920 + 13 attn_k 59896.29 0.0000 8712.0469 14.62 232.75 99.90% 4096 6.9162 57.63% 0.32% 0.9786 + 24 attn_k 56921.47 0.0000 9241.3613 13.90 213.49 99.98% 4096 7.0602 58.84% 0.54% 0.9757 + 17 attn_k 56760.55 0.0000 10113.2490 13.86 223.49 99.95% 4096 6.7278 56.06% 0.44% 0.9569 + 15 attn_k 55194.11 0.0000 9697.7783 13.48 229.94 99.93% 4096 5.9460 49.55% 0.37% 0.9288 + 14 attn_k 54794.45 0.0000 8291.7021 13.38 212.09 99.90% 4096 7.0371 58.64% 0.37% 0.9885 + 6 attn_k 53044.36 0.0000 16721.8047 12.95 321.79 99.88% 4096 4.5291 37.74% 0.32% 0.9479 + 19 attn_k 52733.22 0.0000 11920.8223 12.87 233.97 99.95% 4096 6.5100 54.25% 0.39% 0.9902 + 8 attn_k 51986.04 0.0000 10429.2197 12.69 233.14 99.90% 4096 6.5500 54.58% 0.29% 0.9699 + 9 attn_k 49641.15 0.0000 10131.9502 12.12 241.91 99.90% 4096 5.8877 49.06% 0.34% 0.9950 + 12 attn_k 48607.36 0.0000 7781.5098 11.87 191.06 99.93% 4096 7.0521 58.77% 0.37% 0.9965 + 4 attn_k 46316.68 0.0000 16862.5684 11.31 309.35 99.85% 4096 5.2333 43.61% 0.22% 0.7166 + 5 attn_k 45620.26 0.0000 16766.0762 11.14 317.06 99.88% 4096 4.5011 37.51% 0.22% 0.9947 + 11 attn_k 45317.79 0.0000 6714.2949 11.06 172.61 99.93% 4096 7.2487 60.41% 0.37% 0.9866 + 10 attn_k 43706.77 0.0000 6960.2920 10.67 180.78 99.93% 4096 6.7438 56.20% 0.34% 0.9923 + 2 attn_k 19532.52 0.0000 6018.6182 4.77 99.81 99.90% 4096 7.6636 63.86% 0.20% 0.3212 + 3 attn_k 17324.65 0.0000 5779.3477 4.23 110.43 99.93% 4096 5.5684 46.40% 0.22% 0.9580 + 1 attn_k 15856.24 0.0000 5183.1831 3.87 86.07 98.66% 4096 6.6712 55.59% 0.22% 0.3483 + 0 attn_k 3049.93 0.0000 247.0233 0.74 7.67 97.14% 4096 6.8129 56.77% 1.12% 0.0000 + 39 attn_output 34614.42 1.0529 359.3260 8.45 11.78 100.00% 4096 11.3365 94.47% 7.59% 0.3770 + 38 attn_output 13420.36 0.2976 103.0346 3.28 4.13 100.00% 4096 11.3248 94.37% 8.98% 0.2804 + 37 attn_output 8831.00 0.2297 108.9257 2.16 3.37 100.00% 4096 11.2679 93.90% 5.49% 0.2283 + 36 attn_output 8013.26 0.1413 95.6638 1.96 3.93 100.00% 4096 11.0147 91.79% 4.32% 0.2457 + 35 attn_output 3675.13 0.1878 48.8232 0.90 1.65 100.00% 4096 11.2243 93.54% 4.52% 0.2357 + 34 attn_output 3483.17 0.0606 33.0821 0.85 1.59 100.00% 4096 11.1613 93.01% 3.96% 0.1202 + 33 attn_output 3199.42 0.0368 114.1278 0.78 2.50 100.00% 4096 10.6861 89.05% 3.47% 0.1774 + 31 attn_output 2234.35 0.0523 41.9673 0.55 1.30 100.00% 4096 10.8825 90.69% 5.00% 0.2217 + 1 attn_output 1997.94 0.0106 18.5997 0.49 0.98 100.00% 4096 10.5931 88.28% 10.03% 0.0200 + 2 attn_output 1738.62 0.0199 8.3539 0.42 0.55 100.00% 4096 11.1150 92.63% 16.77% 0.3314 + 32 attn_output 1524.28 0.0124 13.4450 0.37 0.70 100.00% 4096 11.1955 93.30% 3.03% 0.1768 + 30 attn_output 1314.30 0.0875 9.0597 0.32 0.51 100.00% 4096 11.3404 94.50% 3.64% 0.2404 + 28 attn_output 1114.76 0.0367 12.3692 0.27 0.65 100.00% 4096 10.8974 90.81% 3.59% 0.1806 + 3 attn_output 1080.53 0.0156 5.2009 0.26 0.36 100.00% 4096 11.1964 93.30% 10.28% 0.2966 + 24 attn_output 914.61 0.0400 6.5156 0.22 0.32 100.00% 4096 11.3443 94.54% 6.69% 0.1864 + 23 attn_output 900.86 0.0061 16.3921 0.22 0.55 100.00% 4096 10.9512 91.26% 3.27% 0.1833 + 29 attn_output 868.15 0.0168 8.5233 0.21 0.45 100.00% 4096 11.0238 91.87% 4.83% 0.1848 + 27 attn_output 855.07 0.0081 8.6770 0.21 0.41 100.00% 4096 11.1371 92.81% 3.39% 0.2051 + 22 attn_output 812.40 0.0144 7.8408 0.20 0.37 100.00% 4096 11.1740 93.12% 3.88% 0.2016 + 17 attn_output 711.99 0.0085 4.7093 0.17 0.28 100.00% 4096 11.1963 93.30% 4.81% 0.2343 + 21 attn_output 709.29 0.0000 10.5542 0.17 0.37 100.00% 4096 10.9892 91.58% 4.57% 0.1817 + 19 attn_output 707.10 0.0186 6.2341 0.17 0.23 100.00% 4096 11.4981 95.82% 3.22% 0.2283 + 20 attn_output 703.72 0.0083 5.4485 0.17 0.28 100.00% 4096 11.2945 94.12% 3.93% 0.2992 + 25 attn_output 656.68 0.0052 10.7580 0.16 0.38 100.00% 4096 11.0354 91.96% 2.08% 0.2214 + 15 attn_output 646.34 0.0052 8.5272 0.16 0.29 100.00% 4096 11.0779 92.32% 6.45% 0.2046 + 16 attn_output 610.13 0.0001 6.2649 0.15 0.35 100.00% 4096 10.7437 89.53% 3.83% 0.1555 + 26 attn_output 609.93 0.0232 5.7634 0.15 0.28 100.00% 4096 11.2354 93.63% 3.52% 0.1915 + 13 attn_output 548.35 0.0033 4.0648 0.13 0.21 100.00% 4096 10.8651 90.54% 12.60% 0.4666 + 18 attn_output 516.94 0.0000 10.2987 0.13 0.28 99.76% 4096 11.0928 92.44% 2.69% 0.2541 + 4 attn_output 453.05 0.0051 3.9059 0.11 0.15 100.00% 4096 11.3247 94.37% 8.91% 0.3225 + 0 attn_output 447.61 0.0008 91.7077 0.11 1.72 100.00% 4096 6.9468 57.89% 0.73% 0.0000 + 10 attn_output 439.24 0.0036 3.4716 0.11 0.26 100.00% 4096 10.4123 86.77% 7.06% 0.2021 + 11 attn_output 401.22 0.0051 1.9410 0.10 0.15 100.00% 4096 11.0397 92.00% 10.16% 0.1149 + 14 attn_output 350.10 0.0024 3.9673 0.09 0.14 100.00% 4096 11.0800 92.33% 6.81% 0.2386 + 5 attn_output 296.64 0.0028 3.5316 0.07 0.11 100.00% 4096 11.1694 93.08% 6.15% 0.2990 + 7 attn_output 240.15 0.0033 3.3802 0.06 0.11 100.00% 4096 11.1050 92.54% 5.96% 0.3540 + 6 attn_output 218.34 0.0015 1.1016 0.05 0.07 100.00% 4096 11.2492 93.74% 10.18% 0.2733 + 8 attn_output 208.74 0.0015 5.2777 0.05 0.12 100.00% 4096 10.9654 91.38% 5.47% 0.1650 + 12 attn_output 208.53 0.0032 2.3583 0.05 0.07 100.00% 4096 11.4232 95.19% 9.01% 0.3528 + 9 attn_output 151.65 0.0025 1.4596 0.04 0.05 100.00% 4096 11.3319 94.43% 6.32% 0.2434 + 39 attn_q 174643.00 2.0243 18914.7031 42.64 497.38 100.00% 4096 8.8904 74.09% 0.37% 0.8865 + 38 attn_q 166087.69 0.0000 20050.7402 40.55 448.81 99.98% 4096 8.6133 71.78% 0.46% 0.9835 + 37 attn_q 146614.73 0.7595 20756.5176 35.79 423.56 100.00% 4096 8.3834 69.86% 0.44% 0.9842 + 36 attn_q 134513.23 0.5200 19003.3398 32.84 386.52 100.00% 4096 8.2079 68.40% 0.51% 0.9501 + 35 attn_q 124035.38 1.3198 11533.5068 30.28 305.84 100.00% 4096 8.2914 69.09% 0.71% 0.9665 + 34 attn_q 123896.47 0.5387 17763.3496 30.25 472.60 100.00% 4096 6.6833 55.69% 0.44% 0.9800 + 32 attn_q 111128.92 0.1590 25305.4961 27.13 504.76 100.00% 4096 6.2867 52.39% 0.37% 0.9931 + 30 attn_q 109705.93 0.0000 33505.8359 26.78 617.29 99.98% 4096 5.4138 45.11% 0.27% 0.9759 + 33 attn_q 109045.48 0.9963 20537.8262 26.62 448.68 100.00% 4096 6.6424 55.35% 0.37% 0.9914 + 29 attn_q 87316.21 0.0000 20308.4004 21.32 398.84 99.95% 4096 6.1991 51.66% 0.37% 0.9936 + 31 attn_q 87185.37 0.0000 22471.5254 21.29 421.05 99.98% 4096 6.2130 51.78% 0.34% 0.9963 + 28 attn_q 82016.98 0.0000 23024.2656 20.02 430.64 99.95% 4096 5.6830 47.36% 0.29% 0.9976 + 27 attn_q 76821.73 0.0000 17911.5156 18.76 348.43 99.98% 4096 6.3444 52.87% 0.34% 0.9791 + 16 attn_q 76133.66 0.0000 11891.1016 18.59 311.73 99.93% 4096 6.1208 51.01% 0.42% 0.9643 + 23 attn_q 74987.37 0.0000 18502.6172 18.31 356.15 99.95% 4096 6.1167 50.97% 0.39% 0.9980 + 22 attn_q 71226.52 0.0000 18282.5273 17.39 343.00 99.98% 4096 6.2861 52.38% 0.32% 0.9901 + 21 attn_q 69479.59 0.0000 14531.5439 16.96 297.70 99.95% 4096 6.6280 55.23% 0.39% 0.9882 + 20 attn_q 68286.74 0.0000 18747.7070 16.67 345.57 99.95% 4096 6.0506 50.42% 0.34% 0.9917 + 25 attn_q 68221.13 0.0000 14562.2783 16.66 294.34 99.98% 4096 6.3805 53.17% 0.39% 0.9829 + 7 attn_q 68092.71 0.0000 17005.3926 16.62 348.43 99.90% 4096 5.9826 49.85% 0.32% 0.9814 + 18 attn_q 66295.24 0.0000 12255.0391 16.19 262.31 99.95% 4096 6.9157 57.63% 0.42% 0.9978 + 26 attn_q 65291.02 0.0000 12545.3467 15.94 268.54 99.98% 4096 6.6067 55.06% 0.39% 0.9920 + 13 attn_q 59896.29 0.0000 8712.0469 14.62 232.75 99.90% 4096 6.9162 57.63% 0.32% 0.9786 + 24 attn_q 56921.47 0.0000 9241.3613 13.90 213.49 99.98% 4096 7.0602 58.84% 0.54% 0.9757 + 17 attn_q 56760.55 0.0000 10113.2490 13.86 223.49 99.95% 4096 6.7278 56.06% 0.44% 0.9569 + 15 attn_q 55194.11 0.0000 9697.7783 13.48 229.94 99.93% 4096 5.9460 49.55% 0.37% 0.9288 + 14 attn_q 54794.45 0.0000 8291.7021 13.38 212.09 99.90% 4096 7.0371 58.64% 0.37% 0.9885 + 6 attn_q 53044.36 0.0000 16721.8047 12.95 321.79 99.88% 4096 4.5291 37.74% 0.32% 0.9479 + 19 attn_q 52733.22 0.0000 11920.8223 12.87 233.97 99.95% 4096 6.5100 54.25% 0.39% 0.9902 + 8 attn_q 51986.04 0.0000 10429.2197 12.69 233.14 99.90% 4096 6.5500 54.58% 0.29% 0.9699 + 9 attn_q 49641.15 0.0000 10131.9502 12.12 241.91 99.90% 4096 5.8877 49.06% 0.34% 0.9950 + 12 attn_q 48607.36 0.0000 7781.5098 11.87 191.06 99.93% 4096 7.0521 58.77% 0.37% 0.9965 + 4 attn_q 46316.68 0.0000 16862.5684 11.31 309.35 99.85% 4096 5.2333 43.61% 0.22% 0.7166 + 5 attn_q 45620.26 0.0000 16766.0762 11.14 317.06 99.88% 4096 4.5011 37.51% 0.22% 0.9947 + 11 attn_q 45317.79 0.0000 6714.2949 11.06 172.61 99.93% 4096 7.2487 60.41% 0.37% 0.9866 + 10 attn_q 43706.77 0.0000 6960.2920 10.67 180.78 99.93% 4096 6.7438 56.20% 0.34% 0.9923 + 2 attn_q 19532.52 0.0000 6018.6182 4.77 99.81 99.90% 4096 7.6636 63.86% 0.20% 0.3212 + 3 attn_q 17324.65 0.0000 5779.3477 4.23 110.43 99.93% 4096 5.5684 46.40% 0.22% 0.9580 + 1 attn_q 15856.24 0.0000 5183.1831 3.87 86.07 98.66% 4096 6.6712 55.59% 0.22% 0.3483 + 0 attn_q 3049.93 0.0000 247.0233 0.74 7.67 97.14% 4096 6.8129 56.77% 1.12% 0.0000 + 39 attn_v 174643.00 2.0243 18914.7031 42.64 497.38 100.00% 4096 8.8904 74.09% 0.37% 0.8865 + 38 attn_v 166087.69 0.0000 20050.7402 40.55 448.81 99.98% 4096 8.6133 71.78% 0.46% 0.9835 + 37 attn_v 146614.73 0.7595 20756.5176 35.79 423.56 100.00% 4096 8.3834 69.86% 0.44% 0.9842 + 36 attn_v 134513.23 0.5200 19003.3398 32.84 386.52 100.00% 4096 8.2079 68.40% 0.51% 0.9501 + 35 attn_v 124035.38 1.3198 11533.5068 30.28 305.84 100.00% 4096 8.2914 69.09% 0.71% 0.9665 + 34 attn_v 123896.47 0.5387 17763.3496 30.25 472.60 100.00% 4096 6.6833 55.69% 0.44% 0.9800 + 32 attn_v 111128.92 0.1590 25305.4961 27.13 504.76 100.00% 4096 6.2867 52.39% 0.37% 0.9931 + 30 attn_v 109705.93 0.0000 33505.8359 26.78 617.29 99.98% 4096 5.4138 45.11% 0.27% 0.9759 + 33 attn_v 109045.48 0.9963 20537.8262 26.62 448.68 100.00% 4096 6.6424 55.35% 0.37% 0.9914 + 29 attn_v 87316.21 0.0000 20308.4004 21.32 398.84 99.95% 4096 6.1991 51.66% 0.37% 0.9936 + 31 attn_v 87185.37 0.0000 22471.5254 21.29 421.05 99.98% 4096 6.2130 51.78% 0.34% 0.9963 + 28 attn_v 82016.98 0.0000 23024.2656 20.02 430.64 99.95% 4096 5.6830 47.36% 0.29% 0.9976 + 27 attn_v 76821.73 0.0000 17911.5156 18.76 348.43 99.98% 4096 6.3444 52.87% 0.34% 0.9791 + 16 attn_v 76133.66 0.0000 11891.1016 18.59 311.73 99.93% 4096 6.1208 51.01% 0.42% 0.9643 + 23 attn_v 74987.37 0.0000 18502.6172 18.31 356.15 99.95% 4096 6.1167 50.97% 0.39% 0.9980 + 22 attn_v 71226.52 0.0000 18282.5273 17.39 343.00 99.98% 4096 6.2861 52.38% 0.32% 0.9901 + 21 attn_v 69479.59 0.0000 14531.5439 16.96 297.70 99.95% 4096 6.6280 55.23% 0.39% 0.9882 + 20 attn_v 68286.74 0.0000 18747.7070 16.67 345.57 99.95% 4096 6.0506 50.42% 0.34% 0.9917 + 25 attn_v 68221.13 0.0000 14562.2783 16.66 294.34 99.98% 4096 6.3805 53.17% 0.39% 0.9829 + 7 attn_v 68092.71 0.0000 17005.3926 16.62 348.43 99.90% 4096 5.9826 49.85% 0.32% 0.9814 + 18 attn_v 66295.24 0.0000 12255.0391 16.19 262.31 99.95% 4096 6.9157 57.63% 0.42% 0.9978 + 26 attn_v 65291.02 0.0000 12545.3467 15.94 268.54 99.98% 4096 6.6067 55.06% 0.39% 0.9920 + 13 attn_v 59896.29 0.0000 8712.0469 14.62 232.75 99.90% 4096 6.9162 57.63% 0.32% 0.9786 + 24 attn_v 56921.47 0.0000 9241.3613 13.90 213.49 99.98% 4096 7.0602 58.84% 0.54% 0.9757 + 17 attn_v 56760.55 0.0000 10113.2490 13.86 223.49 99.95% 4096 6.7278 56.06% 0.44% 0.9569 + 15 attn_v 55194.11 0.0000 9697.7783 13.48 229.94 99.93% 4096 5.9460 49.55% 0.37% 0.9288 + 14 attn_v 54794.45 0.0000 8291.7021 13.38 212.09 99.90% 4096 7.0371 58.64% 0.37% 0.9885 + 6 attn_v 53044.36 0.0000 16721.8047 12.95 321.79 99.88% 4096 4.5291 37.74% 0.32% 0.9479 + 19 attn_v 52733.22 0.0000 11920.8223 12.87 233.97 99.95% 4096 6.5100 54.25% 0.39% 0.9902 + 8 attn_v 51986.04 0.0000 10429.2197 12.69 233.14 99.90% 4096 6.5500 54.58% 0.29% 0.9699 + 9 attn_v 49641.15 0.0000 10131.9502 12.12 241.91 99.90% 4096 5.8877 49.06% 0.34% 0.9950 + 12 attn_v 48607.36 0.0000 7781.5098 11.87 191.06 99.93% 4096 7.0521 58.77% 0.37% 0.9965 + 4 attn_v 46316.68 0.0000 16862.5684 11.31 309.35 99.85% 4096 5.2333 43.61% 0.22% 0.7166 + 5 attn_v 45620.26 0.0000 16766.0762 11.14 317.06 99.88% 4096 4.5011 37.51% 0.22% 0.9947 + 11 attn_v 45317.79 0.0000 6714.2949 11.06 172.61 99.93% 4096 7.2487 60.41% 0.37% 0.9866 + 10 attn_v 43706.77 0.0000 6960.2920 10.67 180.78 99.93% 4096 6.7438 56.20% 0.34% 0.9923 + 2 attn_v 19532.52 0.0000 6018.6182 4.77 99.81 99.90% 4096 7.6636 63.86% 0.20% 0.3212 + 3 attn_v 17324.65 0.0000 5779.3477 4.23 110.43 99.93% 4096 5.5684 46.40% 0.22% 0.9580 + 1 attn_v 15856.24 0.0000 5183.1831 3.87 86.07 98.66% 4096 6.6712 55.59% 0.22% 0.3483 + 0 attn_v 3049.93 0.0000 247.0233 0.74 7.67 97.14% 4096 6.8129 56.77% 1.12% 0.0000 + 39 ffn_down 1645556.38 0.0689 81837.1094 128.56 1422.92 100.00% 12800 9.8110 71.91% 1.00% 0.0173 + 5 ffn_down 381550.03 0.0094 210713.1406 29.81 2109.55 100.00% 12800 1.6182 11.86% 0.03% 0.0011 + 38 ffn_down 242713.44 0.2437 7073.1206 18.96 84.75 100.00% 12800 12.6848 92.97% 0.70% 0.0476 + 3 ffn_down 188951.92 0.0109 147793.5938 14.76 1332.36 100.00% 12800 1.2131 8.89% 0.03% 0.0026 + 37 ffn_down 142317.03 0.0870 5205.8745 11.12 52.73 100.00% 12800 12.6322 92.59% 0.67% 0.0329 + 36 ffn_down 83945.62 0.0217 2698.7646 6.56 37.12 100.00% 12800 11.9912 87.89% 1.06% 0.0443 + 35 ffn_down 42474.45 0.0859 952.5922 3.32 13.19 100.00% 12800 12.1797 89.27% 2.48% 0.0830 + 34 ffn_down 26102.06 0.0438 229.3722 2.04 5.52 100.00% 12800 12.2821 90.02% 4.28% 0.1049 + 33 ffn_down 12264.68 0.0564 152.2174 0.96 2.91 100.00% 12800 12.2756 89.97% 3.31% 0.0943 + 0 ffn_down 6042.44 0.0000 2911.0620 0.47 25.98 100.00% 12800 5.6061 41.09% 0.12% 0.0000 + 32 ffn_down 5920.96 0.0421 99.5476 0.46 1.42 100.00% 12800 12.4740 91.43% 3.20% 0.0558 + 31 ffn_down 3874.26 0.0193 185.5614 0.30 1.72 100.00% 12800 12.4092 90.95% 1.28% 0.0903 + 30 ffn_down 3486.52 0.0333 71.5827 0.27 0.87 100.00% 12800 12.5027 91.64% 2.80% 0.0724 + 29 ffn_down 2260.60 0.0105 59.7711 0.18 0.70 100.00% 12800 12.4988 91.61% 1.99% 0.0480 + 2 ffn_down 1941.90 0.0007 217.4457 0.15 1.95 100.00% 12800 11.9799 87.80% 0.16% 0.0440 + 28 ffn_down 1810.21 0.0343 70.6592 0.14 0.70 100.00% 12800 12.6709 92.87% 0.97% 0.0639 + 1 ffn_down 1559.32 0.0002 17.3429 0.12 0.21 100.00% 12800 13.0362 95.55% 5.21% 0.0058 + 27 ffn_down 1505.68 0.0334 16.9273 0.12 0.33 100.00% 12800 12.8432 94.13% 1.88% 0.0300 + 23 ffn_down 1423.83 0.0149 23.6004 0.11 0.42 100.00% 12800 12.5741 92.16% 1.63% 0.0554 + 24 ffn_down 1422.49 0.0148 57.0210 0.11 0.56 100.00% 12800 12.6908 93.01% 0.84% 0.0463 + 26 ffn_down 1401.04 0.0277 123.4429 0.11 1.10 100.00% 12800 12.4077 90.94% 0.29% 0.0151 + 25 ffn_down 1375.32 0.0282 80.1291 0.11 0.74 100.00% 12800 12.6175 92.48% 0.48% 0.0268 + 22 ffn_down 1156.34 0.0120 34.4371 0.09 0.38 100.00% 12800 12.6976 93.06% 1.30% 0.0673 + 21 ffn_down 1138.10 0.0147 24.3792 0.09 0.28 100.00% 12800 12.7839 93.70% 2.28% 0.0947 + 20 ffn_down 1136.30 0.0127 21.3931 0.09 0.26 100.00% 12800 12.8313 94.04% 2.33% 0.1177 + 18 ffn_down 1119.47 0.0176 4.8772 0.09 0.14 100.00% 12800 12.9995 95.28% 5.52% 0.2902 + 19 ffn_down 1116.88 0.0151 20.6434 0.09 0.23 100.00% 12800 12.9391 94.83% 2.98% 0.1868 + 16 ffn_down 1105.64 0.0134 17.2072 0.09 0.19 100.00% 12800 13.0591 95.71% 3.31% 0.0832 + 17 ffn_down 1041.27 0.0162 9.5109 0.08 0.13 100.00% 12800 13.1452 96.34% 4.94% 0.2264 + 15 ffn_down 1024.12 0.0230 43.3770 0.08 0.40 100.00% 12800 12.7413 93.39% 1.24% 0.0284 + 6 ffn_down 1022.13 0.0078 352.7156 0.08 3.12 100.00% 12800 9.2787 68.01% 0.05% 0.0001 + 14 ffn_down 951.70 0.0253 49.8595 0.07 0.50 100.00% 12800 12.6100 92.42% 0.50% 0.0720 + 11 ffn_down 926.64 0.0218 12.4872 0.07 0.13 100.00% 12800 13.2980 97.47% 1.80% 0.2713 + 4 ffn_down 923.82 0.0080 22.2460 0.07 0.37 100.00% 12800 12.2201 89.57% 1.02% 0.0012 + 12 ffn_down 888.91 0.0240 7.9189 0.07 0.12 100.00% 12800 13.2283 96.95% 2.80% 0.2412 + 10 ffn_down 857.79 0.0211 5.1418 0.07 0.10 100.00% 12800 13.1941 96.70% 3.75% 0.1709 + 13 ffn_down 845.87 0.0263 8.8175 0.07 0.11 100.00% 12800 13.2368 97.02% 2.59% 0.2583 + 9 ffn_down 829.72 0.0127 11.8182 0.06 0.21 100.00% 12800 12.6312 92.58% 1.71% 0.1047 + 7 ffn_down 761.20 0.0088 17.7730 0.06 0.18 100.00% 12800 13.0089 95.35% 1.20% 0.0092 + 8 ffn_down 714.58 0.0115 9.3180 0.06 0.15 100.00% 12800 12.9415 94.85% 1.95% 0.1090 + 39 ffn_gate 73855.20 0.0000 27529.4883 18.03 432.89 99.98% 4096 7.6055 63.38% 0.27% 0.9793 + 38 ffn_gate 29146.70 0.0000 3154.5896 7.12 50.92 99.98% 4096 10.6959 89.13% 0.63% 0.9972 + 37 ffn_gate 23469.74 0.0000 1999.8401 5.73 32.33 100.00% 4096 11.0207 91.84% 0.66% 0.9944 + 36 ffn_gate 18879.52 0.0000 1587.3755 4.61 26.11 99.98% 4096 10.9542 91.28% 0.68% 0.9535 + 35 ffn_gate 12609.52 0.0000 691.8613 3.08 13.18 99.95% 4096 10.9728 91.44% 1.10% 0.7708 + 34 ffn_gate 9369.36 0.0000 369.9386 2.29 8.82 99.98% 4096 10.8088 90.07% 1.39% 0.9703 + 33 ffn_gate 6623.49 0.0000 206.4922 1.62 5.04 99.98% 4096 11.0320 91.93% 1.56% 0.9570 + 32 ffn_gate 4946.19 0.2047 142.2780 1.21 3.44 100.00% 4096 11.1646 93.04% 1.49% 0.8361 + 0 ffn_gate 4647.99 0.0000 530.0613 1.13 10.15 68.41% 4096 8.6122 71.77% 1.15% 0.0000 + 31 ffn_gate 4116.51 0.0000 180.2167 1.01 3.74 99.98% 4096 11.0407 92.01% 1.32% 0.5779 + 1 ffn_gate 3766.61 0.0000 642.9065 0.92 11.02 99.22% 4096 9.7890 81.57% 0.12% 0.0923 + 30 ffn_gate 3736.23 0.0000 93.6410 0.91 2.60 100.00% 4096 11.2482 93.73% 1.29% 0.9941 + 29 ffn_gate 3591.15 0.0000 88.1915 0.88 2.41 99.98% 4096 11.3407 94.51% 1.10% 0.9684 + 25 ffn_gate 3420.11 0.0000 73.2920 0.83 2.34 99.98% 4096 11.3321 94.43% 0.85% 0.9818 + 12 ffn_gate 3417.91 0.0000 98.0158 0.83 1.89 99.95% 4096 11.6038 96.70% 0.49% 0.9870 + 26 ffn_gate 3406.55 0.0000 75.7852 0.83 2.13 99.98% 4096 11.4214 95.18% 0.88% 0.9927 + 13 ffn_gate 3312.10 0.0000 90.1448 0.81 1.61 99.98% 4096 11.6814 97.34% 0.51% 0.9759 + 11 ffn_gate 3301.55 0.0000 105.7369 0.81 2.04 99.95% 4096 11.5318 96.10% 0.44% 0.9904 + 28 ffn_gate 3275.83 0.0000 68.6310 0.80 1.93 99.98% 4096 11.4178 95.15% 1.22% 0.9578 + 27 ffn_gate 3236.11 0.0000 72.2997 0.79 1.87 99.98% 4096 11.4812 95.68% 1.03% 0.9950 + 14 ffn_gate 3080.71 0.0000 65.3723 0.75 1.33 99.98% 4096 11.6985 97.49% 0.71% 0.9506 + 16 ffn_gate 3016.78 0.0000 55.2134 0.74 1.58 99.98% 4096 11.5581 96.32% 0.76% 0.9874 + 24 ffn_gate 3010.61 0.0000 73.8905 0.74 2.21 100.00% 4096 11.2557 93.80% 0.93% 0.9690 + 22 ffn_gate 2879.76 0.0001 77.3697 0.70 2.02 100.00% 4096 11.3401 94.50% 0.81% 0.9957 + 17 ffn_gate 2870.70 0.0000 51.7692 0.70 1.48 99.98% 4096 11.5662 96.39% 0.73% 0.9959 + 23 ffn_gate 2843.65 0.0001 89.6189 0.69 2.21 100.00% 4096 11.2398 93.66% 0.95% 0.9937 + 10 ffn_gate 2833.02 0.0000 85.4598 0.69 1.74 99.90% 4096 11.5267 96.06% 0.46% 0.8381 + 19 ffn_gate 2776.20 0.0000 70.3380 0.68 1.86 100.00% 4096 11.3917 94.93% 0.71% 0.9944 + 15 ffn_gate 2760.72 0.0000 57.4514 0.67 1.45 99.95% 4096 11.5659 96.38% 0.83% 0.9038 + 18 ffn_gate 2754.00 0.0001 61.1560 0.67 1.55 100.00% 4096 11.5243 96.04% 0.76% 0.9902 + 21 ffn_gate 2737.92 0.0000 70.9311 0.67 1.86 99.98% 4096 11.3741 94.78% 0.81% 0.9902 + 20 ffn_gate 2699.39 0.0000 65.5763 0.66 1.79 99.98% 4096 11.3889 94.91% 0.83% 0.9932 + 8 ffn_gate 2469.90 0.0000 103.2819 0.60 1.84 99.93% 4096 11.4803 95.67% 0.56% 0.9061 + 2 ffn_gate 2319.60 0.0000 630.8475 0.57 10.17 99.80% 4096 8.7421 72.85% 0.12% 0.0200 + 9 ffn_gate 2312.98 0.0000 70.3176 0.56 1.47 99.93% 4096 11.4845 95.70% 0.76% 0.9660 + 7 ffn_gate 2253.67 0.0000 58.9012 0.55 1.44 99.93% 4096 11.5263 96.05% 0.54% 0.9532 + 5 ffn_gate 2213.64 0.0000 330.8649 0.54 5.46 99.93% 4096 10.0780 83.98% 0.24% 0.2463 + 6 ffn_gate 1791.24 0.0000 42.9261 0.44 1.12 99.90% 4096 11.4775 95.65% 0.66% 0.2691 + 4 ffn_gate 1789.65 0.0000 112.6252 0.44 2.11 99.93% 4096 11.0733 92.28% 0.29% 0.9765 + 3 ffn_gate 1716.85 0.0000 175.6357 0.42 3.39 99.90% 4096 10.3538 86.28% 0.22% 0.8606 + 39 ffn_up 73855.20 0.0000 27529.4883 18.03 432.89 99.98% 4096 7.6055 63.38% 0.27% 0.9793 + 38 ffn_up 29146.70 0.0000 3154.5896 7.12 50.92 99.98% 4096 10.6959 89.13% 0.63% 0.9972 + 37 ffn_up 23469.74 0.0000 1999.8401 5.73 32.33 100.00% 4096 11.0207 91.84% 0.66% 0.9944 + 36 ffn_up 18879.52 0.0000 1587.3755 4.61 26.11 99.98% 4096 10.9542 91.28% 0.68% 0.9535 + 35 ffn_up 12609.52 0.0000 691.8613 3.08 13.18 99.95% 4096 10.9728 91.44% 1.10% 0.7708 + 34 ffn_up 9369.36 0.0000 369.9386 2.29 8.82 99.98% 4096 10.8088 90.07% 1.39% 0.9703 + 33 ffn_up 6623.49 0.0000 206.4922 1.62 5.04 99.98% 4096 11.0320 91.93% 1.56% 0.9570 + 32 ffn_up 4946.19 0.2047 142.2780 1.21 3.44 100.00% 4096 11.1646 93.04% 1.49% 0.8361 + 0 ffn_up 4647.99 0.0000 530.0613 1.13 10.15 68.41% 4096 8.6122 71.77% 1.15% 0.0000 + 31 ffn_up 4116.51 0.0000 180.2167 1.01 3.74 99.98% 4096 11.0407 92.01% 1.32% 0.5779 + 1 ffn_up 3766.61 0.0000 642.9065 0.92 11.02 99.22% 4096 9.7890 81.57% 0.12% 0.0923 + 30 ffn_up 3736.23 0.0000 93.6410 0.91 2.60 100.00% 4096 11.2482 93.73% 1.29% 0.9941 + 29 ffn_up 3591.15 0.0000 88.1915 0.88 2.41 99.98% 4096 11.3407 94.51% 1.10% 0.9684 + 25 ffn_up 3420.11 0.0000 73.2920 0.83 2.34 99.98% 4096 11.3321 94.43% 0.85% 0.9818 + 12 ffn_up 3417.91 0.0000 98.0158 0.83 1.89 99.95% 4096 11.6038 96.70% 0.49% 0.9870 + 26 ffn_up 3406.55 0.0000 75.7852 0.83 2.13 99.98% 4096 11.4214 95.18% 0.88% 0.9927 + 13 ffn_up 3312.10 0.0000 90.1448 0.81 1.61 99.98% 4096 11.6814 97.34% 0.51% 0.9759 + 11 ffn_up 3301.55 0.0000 105.7369 0.81 2.04 99.95% 4096 11.5318 96.10% 0.44% 0.9904 + 28 ffn_up 3275.83 0.0000 68.6310 0.80 1.93 99.98% 4096 11.4178 95.15% 1.22% 0.9578 + 27 ffn_up 3236.11 0.0000 72.2997 0.79 1.87 99.98% 4096 11.4812 95.68% 1.03% 0.9950 + 14 ffn_up 3080.71 0.0000 65.3723 0.75 1.33 99.98% 4096 11.6985 97.49% 0.71% 0.9506 + 16 ffn_up 3016.78 0.0000 55.2134 0.74 1.58 99.98% 4096 11.5581 96.32% 0.76% 0.9874 + 24 ffn_up 3010.61 0.0000 73.8905 0.74 2.21 100.00% 4096 11.2557 93.80% 0.93% 0.9690 + 22 ffn_up 2879.76 0.0001 77.3697 0.70 2.02 100.00% 4096 11.3401 94.50% 0.81% 0.9957 + 17 ffn_up 2870.70 0.0000 51.7692 0.70 1.48 99.98% 4096 11.5662 96.39% 0.73% 0.9959 + 23 ffn_up 2843.65 0.0001 89.6189 0.69 2.21 100.00% 4096 11.2398 93.66% 0.95% 0.9937 + 10 ffn_up 2833.02 0.0000 85.4598 0.69 1.74 99.90% 4096 11.5267 96.06% 0.46% 0.8381 + 19 ffn_up 2776.20 0.0000 70.3380 0.68 1.86 100.00% 4096 11.3917 94.93% 0.71% 0.9944 + 15 ffn_up 2760.72 0.0000 57.4514 0.67 1.45 99.95% 4096 11.5659 96.38% 0.83% 0.9038 + 18 ffn_up 2754.00 0.0001 61.1560 0.67 1.55 100.00% 4096 11.5243 96.04% 0.76% 0.9902 + 21 ffn_up 2737.92 0.0000 70.9311 0.67 1.86 99.98% 4096 11.3741 94.78% 0.81% 0.9902 + 20 ffn_up 2699.39 0.0000 65.5763 0.66 1.79 99.98% 4096 11.3889 94.91% 0.83% 0.9932 + 8 ffn_up 2469.90 0.0000 103.2819 0.60 1.84 99.93% 4096 11.4803 95.67% 0.56% 0.9061 + 2 ffn_up 2319.60 0.0000 630.8475 0.57 10.17 99.80% 4096 8.7421 72.85% 0.12% 0.0200 + 9 ffn_up 2312.98 0.0000 70.3176 0.56 1.47 99.93% 4096 11.4845 95.70% 0.76% 0.9660 + 7 ffn_up 2253.67 0.0000 58.9012 0.55 1.44 99.93% 4096 11.5263 96.05% 0.54% 0.9532 + 5 ffn_up 2213.64 0.0000 330.8649 0.54 5.46 99.93% 4096 10.0780 83.98% 0.24% 0.2463 + 6 ffn_up 1791.24 0.0000 42.9261 0.44 1.12 99.90% 4096 11.4775 95.65% 0.66% 0.2691 + 4 ffn_up 1789.65 0.0000 112.6252 0.44 2.11 99.93% 4096 11.0733 92.28% 0.29% 0.9765 + 3 ffn_up 1716.85 0.0000 175.6357 0.42 3.39 99.90% 4096 10.3538 86.28% 0.22% 0.8606 + +Computing weighted average statistics per layer (40 layers) + + Layer μΣ(Act²) μZD μCosSim +================================================ + 0 4139.83 0.7411% 0.0000 + 1 6791.54 2.9832% 0.1389 + 2 7785.62 1.9852% 0.1614 + 3 70900.05 1.2575% 0.5370 + 4 15985.68 1.4608% 0.4854 + 5 146183.94 0.8107% 0.4142 + 6 18205.81 1.3806% 0.4006 + 7 23167.60 1.2842% 0.5735 + 8 17900.25 1.4849% 0.5729 + 9 17128.10 1.5571% 0.6014 + 10 15332.18 2.2715% 0.5906 + 11 15983.94 1.9451% 0.6469 + 12 17056.90 2.1752% 0.6652 + 13 20767.65 2.4856% 0.6752 + 14 19054.13 1.1933% 0.5841 + 15 19172.65 1.4341% 0.5356 + 16 26136.96 1.8568% 0.5790 + 17 19724.82 2.5230% 0.6361 + 18 22839.35 2.4882% 0.6723 + 19 18405.42 1.6588% 0.6325 + 20 23508.35 1.5224% 0.6168 + 21 23910.19 1.5866% 0.5942 + 22 24533.16 1.1531% 0.5889 + 23 25862.99 1.2548% 0.5850 + 24 19961.15 1.3993% 0.5695 + 25 23721.45 0.7063% 0.5718 + 26 22758.83 0.8053% 0.5699 + 27 26575.09 1.3538% 0.5727 + 28 28424.57 1.0889% 0.5796 + 29 30363.12 1.5732% 0.5756 + 30 38224.65 1.7311% 0.5899 + 31 31137.59 1.3886% 0.5094 + 32 39814.40 1.8755% 0.5482 + 33 41853.15 1.9772% 0.5874 + 34 52107.42 2.3491% 0.5840 + 35 58491.29 1.8193% 0.5410 + 36 77988.16 1.1558% 0.5635 + 37 103052.64 1.1210% 0.5778 + 38 145584.28 1.5143% 0.5889 + 39 640944.31 1.3538% 0.5533