commit ae81f79c60e6bf829f31402b345ecaf2647147a6 Author: ModelHub XC Date: Sat Jun 6 08:04:16 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: gaianet/Llama-3_1-Nemotron-51B-Instruct-GGUF Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..65b8dd7 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,52 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-Q2_K.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-Q5_0.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-Q8_0-00001-of-00002.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-Q8_0-00002-of-00002.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-f16-00001-of-00004.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-f16-00002-of-00004.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-f16-00003-of-00004.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3_1-Nemotron-51B-Instruct-f16-00004-of-00004.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/Llama-3_1-Nemotron-51B-Instruct-Q2_K.gguf b/Llama-3_1-Nemotron-51B-Instruct-Q2_K.gguf new file mode 100644 index 0000000..b803511 --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-Q2_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f1d6ad4df6fa589194203235d19bb31315cd73beb3ed2dffb02a6eb1d404ac9 +size 19418642464 diff --git a/Llama-3_1-Nemotron-51B-Instruct-Q3_K_L.gguf b/Llama-3_1-Nemotron-51B-Instruct-Q3_K_L.gguf new file mode 100644 index 0000000..beef18f --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-Q3_K_L.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbe29ae0b25627ac8631a34ca6f11d572ae4a0bd6c1f8650f960a0faf7fe69b0 +size 27349751840 diff --git a/Llama-3_1-Nemotron-51B-Instruct-Q3_K_M.gguf b/Llama-3_1-Nemotron-51B-Instruct-Q3_K_M.gguf new file mode 100644 index 0000000..b880566 --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-Q3_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8595b234baa5fc3dc14db611793cedcdb3ab73bcc421bacc46a15a4ba6bb57b +size 25182345248 diff --git a/Llama-3_1-Nemotron-51B-Instruct-Q3_K_S.gguf b/Llama-3_1-Nemotron-51B-Instruct-Q3_K_S.gguf new file mode 100644 index 0000000..4461d8c --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-Q3_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef2b0a9330eafc8fb6cde9285e7cd1c9b9a081e4ecd68f02fcb8e3b1459eaa68 +size 22652393504 diff --git a/Llama-3_1-Nemotron-51B-Instruct-Q4_0.gguf b/Llama-3_1-Nemotron-51B-Instruct-Q4_0.gguf new file mode 100644 index 0000000..f0091d8 --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-Q4_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:624711249056e4dc79bda0090015f3ed0a44fe7ca35a2a91579efe90b92908aa +size 29252368416 diff --git a/Llama-3_1-Nemotron-51B-Instruct-Q4_K_M.gguf b/Llama-3_1-Nemotron-51B-Instruct-Q4_K_M.gguf new file mode 100644 index 0000000..cdd46c7 --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db50b770b4cc7918f1a25f204609c4c478e41c2f10499ec900259cb052948c11 +size 31037306912 diff --git a/Llama-3_1-Nemotron-51B-Instruct-Q4_K_S.gguf b/Llama-3_1-Nemotron-51B-Instruct-Q4_K_S.gguf new file mode 100644 index 0000000..c29bb19 --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-Q4_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:041d5660e8f40b1fe28c65c522e92814cb855426187aae81ca9359ca10e46df5 +size 29484496928 diff --git a/Llama-3_1-Nemotron-51B-Instruct-Q5_0.gguf b/Llama-3_1-Nemotron-51B-Instruct-Q5_0.gguf new file mode 100644 index 0000000..1e08c8c --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-Q5_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f6baa9e8ba28870f76093655dcf04b9a4439a8edf4b51efc4725c44684d1c24 +size 35558504480 diff --git a/Llama-3_1-Nemotron-51B-Instruct-Q5_K_M.gguf b/Llama-3_1-Nemotron-51B-Instruct-Q5_K_M.gguf new file mode 100644 index 0000000..d0c4822 --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e19d5f2172cb51bcb3dd22391cebaf4c7f0210c562200ae671d8640e115de36 +size 36465391648 diff --git a/Llama-3_1-Nemotron-51B-Instruct-Q5_K_S.gguf b/Llama-3_1-Nemotron-51B-Instruct-Q5_K_S.gguf new file mode 100644 index 0000000..a379d03 --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-Q5_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:906aefa4926bec89d3aff281db866bb02e4ed79792566b28cc3d6a8bebcb5c79 +size 35558504480 diff --git a/Llama-3_1-Nemotron-51B-Instruct-Q6_K.gguf b/Llama-3_1-Nemotron-51B-Instruct-Q6_K.gguf new file mode 100644 index 0000000..cb09b6e --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d66d3daee89d9e2596841f3589b3268ef060aef1513070a5ce390ce00adf12f +size 42258774048 diff --git a/Llama-3_1-Nemotron-51B-Instruct-Q8_0-00001-of-00002.gguf b/Llama-3_1-Nemotron-51B-Instruct-Q8_0-00001-of-00002.gguf new file mode 100644 index 0000000..8203c3a --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-Q8_0-00001-of-00002.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ed2a1ad93790fcfe630e5755f5ce18d9fd7a04808e36343f3b000bde61ffa55 +size 29826134528 diff --git a/Llama-3_1-Nemotron-51B-Instruct-Q8_0-00002-of-00002.gguf b/Llama-3_1-Nemotron-51B-Instruct-Q8_0-00002-of-00002.gguf new file mode 100644 index 0000000..d6e3474 --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-Q8_0-00002-of-00002.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20fc51b96935e0e17daee28f9694ef85cc7f66faabf3ea0d2a5f126e958fbf9 +size 24905238240 diff --git a/Llama-3_1-Nemotron-51B-Instruct-f16-00001-of-00004.gguf b/Llama-3_1-Nemotron-51B-Instruct-f16-00001-of-00004.gguf new file mode 100644 index 0000000..5551187 --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-f16-00001-of-00004.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c29966a7c3b2172525e4b9b29de9391ac1507476e26ae09486fb0953501c5c47 +size 29772092192 diff --git a/Llama-3_1-Nemotron-51B-Instruct-f16-00002-of-00004.gguf b/Llama-3_1-Nemotron-51B-Instruct-f16-00002-of-00004.gguf new file mode 100644 index 0000000..26206c6 --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-f16-00002-of-00004.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67d59882c28b27005f0551e3ece2e49c37a9096022f08b1cb228f8cefc09e75c +size 29562578048 diff --git a/Llama-3_1-Nemotron-51B-Instruct-f16-00003-of-00004.gguf b/Llama-3_1-Nemotron-51B-Instruct-f16-00003-of-00004.gguf new file mode 100644 index 0000000..7cdbb7e --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-f16-00003-of-00004.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:534c2dd1ee26a2fbf3979582b0fdafa9bd01e9d3f0eb7d78a506ab10f5bb0897 +size 29596988128 diff --git a/Llama-3_1-Nemotron-51B-Instruct-f16-00004-of-00004.gguf b/Llama-3_1-Nemotron-51B-Instruct-f16-00004-of-00004.gguf new file mode 100644 index 0000000..75b249d --- /dev/null +++ b/Llama-3_1-Nemotron-51B-Instruct-f16-00004-of-00004.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82558d958d66febc358278b3c96b4479e2c002dd4b7babe49cbf111cc05813f3 +size 14080741216 diff --git a/README.md b/README.md new file mode 100644 index 0000000..caf8ee0 --- /dev/null +++ b/README.md @@ -0,0 +1,42 @@ +--- +base_model: nvidia/Llama-3_1-Nemotron-51B-Instruct +license: other +license_name: nvidia-open-model-license +license_link: >- + https://developer.download.nvidia.com/licenses/nvidia-open-model-license-agreement-june-2024.pdf +model_creator: tiiuae +model_name: Llama-3_1-Nemotron-51B-Instruct +quantized_by: Second State Inc. +library_name: transformers +pipeline_tag: text-generation +language: + - en +tags: + - nvidia + - llama-3 + - pytorch +--- + +# Llama-3_1-Nemotron-51B-Instruct-GGUF + +## Original Model + +[nvidia/Llama-3_1-Nemotron-51B-Instruct](https://huggingface.co/nvidia/Llama-3_1-Nemotron-51B-Instruct) + +## Run with Gaianet + +**Prompt template:** + +prompt template: `llama-3-chat` + +**Context size:** + +chat_ctx_size: `8192` + +**Run with GaiaNet:** + +- Quick start: https://docs.gaianet.ai/node-guide/quick-start + +- Customize your node: https://docs.gaianet.ai/node-guide/customize + +*Quantized with llama.cpp b4381* \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..6b116db --- /dev/null +++ b/config.json @@ -0,0 +1,1004 @@ +{ + "architectures": [ + "DeciLMForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "auto_map": { + "AutoConfig": "configuration_decilm.DeciLMConfig", + "AutoModelForCausalLM": "modeling_decilm.DeciLMForCausalLM" + }, + "block_configs": [ + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 16, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 2.625, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 32, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 2.625, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 32, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 2.625, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 64, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 2.625, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 64, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 2.625, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 32, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 2.625, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 32, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 2.625, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 2.625, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 64, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 2.625, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 32, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 2.625, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 32, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 2.625, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 2.625, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 2.625, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 2.625, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 2.625, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 2.625, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": true, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": true, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": true, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": true, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": true, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": true, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": true, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": true, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": null, + "no_op": false, + "replace_with_linear": true + }, + "ffn": { + "ffn_mult": 1.3125, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + }, + { + "attention": { + "n_heads_in_group": 8, + "no_op": false, + "replace_with_linear": false + }, + "ffn": { + "ffn_mult": 5.25, + "no_op": false, + "replace_with_linear": false + } + } + ], + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "hidden_act": "silu", + "hidden_size": 8192, + "initializer_range": 0.02, + "intermediate_size": null, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "nemotron-nas", + "num_attention_heads": 64, + "num_hidden_layers": 80, + "num_key_value_heads": null, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 8.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.44.2", + "use_cache": true, + "vocab_size": 128256 +}