commit eb24bf5930d848eed2c7369a5ab3c900530d5c35 Author: ModelHub XC Date: Mon Apr 13 01:04:58 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: hiratagoh/NVIDIA-Nemotron-Nano-9B-v2-Japanese-GGUF Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..b325672 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,44 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +NVIDIA-Nemotron-Nano-9B-v2-Japanese-BF16.gguf filter=lfs diff=lfs merge=lfs -text +NVIDIA-Nemotron-Nano-9B-v2-Japanese-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text +NVIDIA-Nemotron-Nano-9B-v2-Japanese-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +NVIDIA-Nemotron-Nano-9B-v2-Japanese-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +NVIDIA-Nemotron-Nano-9B-v2-Japanese-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +NVIDIA-Nemotron-Nano-9B-v2-Japanese-IQ4_XS.gguf filter=lfs diff=lfs merge=lfs -text +NVIDIA-Nemotron-Nano-9B-v2-Japanese-BF16.imatrix.gguf filter=lfs diff=lfs merge=lfs -text +NVIDIA-Nemotron-Nano-9B-v2-Japanese-BF16.imatrix filter=lfs diff=lfs merge=lfs -text +NVIDIA-Nemotron-Nano-9B-v2-Japanese-IQ4_NL.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/NVIDIA-Nemotron-Nano-9B-v2-Japanese-BF16.gguf b/NVIDIA-Nemotron-Nano-9B-v2-Japanese-BF16.gguf new file mode 100644 index 0000000..919f386 --- /dev/null +++ b/NVIDIA-Nemotron-Nano-9B-v2-Japanese-BF16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c030803157ee58834f4310f8c7c4bb02ecc46cbec742ff394331d5d8406b3bf +size 17788743808 diff --git a/NVIDIA-Nemotron-Nano-9B-v2-Japanese-BF16.imatrix b/NVIDIA-Nemotron-Nano-9B-v2-Japanese-BF16.imatrix new file mode 100644 index 0000000..33e74a9 --- /dev/null +++ b/NVIDIA-Nemotron-Nano-9B-v2-Japanese-BF16.imatrix @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1881531ebe0172fe827cdf408be1d628268f57af5b595a9bd24d4bcb42f921d1 +size 3921248 diff --git a/NVIDIA-Nemotron-Nano-9B-v2-Japanese-IQ4_NL.gguf b/NVIDIA-Nemotron-Nano-9B-v2-Japanese-IQ4_NL.gguf new file mode 100644 index 0000000..89ba733 --- /dev/null +++ b/NVIDIA-Nemotron-Nano-9B-v2-Japanese-IQ4_NL.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:005b64b4df48f916a4820ddf33f97aa9834d444f987254a36d2c279b37e3cbca +size 5308682688 diff --git a/NVIDIA-Nemotron-Nano-9B-v2-Japanese-IQ4_XS.gguf b/NVIDIA-Nemotron-Nano-9B-v2-Japanese-IQ4_XS.gguf new file mode 100644 index 0000000..fd7b642 --- /dev/null +++ b/NVIDIA-Nemotron-Nano-9B-v2-Japanese-IQ4_XS.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0e724bb1bbe12781a8bcc87973098d4586302f36e33be5255da9e4f95688d7b9 +size 5267108288 diff --git a/NVIDIA-Nemotron-Nano-9B-v2-Japanese-Q4_K_M.gguf b/NVIDIA-Nemotron-Nano-9B-v2-Japanese-Q4_K_M.gguf new file mode 100644 index 0000000..3339e32 --- /dev/null +++ b/NVIDIA-Nemotron-Nano-9B-v2-Japanese-Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89f275d0189b7d25367d384fcaa80cd45f1c6067ee6fbb5cead642e513eed23d +size 6525629568 diff --git a/NVIDIA-Nemotron-Nano-9B-v2-Japanese-Q5_K_M.gguf b/NVIDIA-Nemotron-Nano-9B-v2-Japanese-Q5_K_M.gguf new file mode 100644 index 0000000..48d06c6 --- /dev/null +++ b/NVIDIA-Nemotron-Nano-9B-v2-Japanese-Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e09878c3c3afca3696d7bc1e0b6f5b2b0db518e703670f15626aa28240759b1 +size 7069806208 diff --git a/NVIDIA-Nemotron-Nano-9B-v2-Japanese-Q6_K.gguf b/NVIDIA-Nemotron-Nano-9B-v2-Japanese-Q6_K.gguf new file mode 100644 index 0000000..66aad14 --- /dev/null +++ b/NVIDIA-Nemotron-Nano-9B-v2-Japanese-Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c18dd454dc3ef6ef26c7adadc3773ffb0ea8bd17b4d5a309782fe71a009e08c +size 9135892608 diff --git a/NVIDIA-Nemotron-Nano-9B-v2-Japanese-Q8_0.gguf b/NVIDIA-Nemotron-Nano-9B-v2-Japanese-Q8_0.gguf new file mode 100644 index 0000000..94d907c --- /dev/null +++ b/NVIDIA-Nemotron-Nano-9B-v2-Japanese-Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c98ba444a2b11331891e0f5d4d5b257784d00eed594b613903ca3d526ef6cc52 +size 9458094208 diff --git a/README.md b/README.md new file mode 100644 index 0000000..a54fac8 --- /dev/null +++ b/README.md @@ -0,0 +1,37 @@ +--- +license: other +license_name: nvidia-nemotron-open-model-license +license_link: >- + https://www.nvidia.com/en-us/agreements/enterprise-software/nvidia-nemotron-open-model-license/ +base_model: nvidia/NVIDIA-Nemotron-Nano-9B-v2-Japanese +datasets: +- TFMC/imatrix-dataset-for-japanese-llm +track_downloads: true +language: +- ja +- en +pipeline_tag: text-generation +--- + +# NVIDIA-Nemotron-Nano-9B-v2-Japanese-GGUF + +## GGUF変換と量子化 + +[nvidia/NVIDIA-Nemotron-Nano-9B-v2-Japanese](https://huggingface.co/nvidia/NVIDIA-Nemotron-Nano-9B-v2-Japanese)を +[llama.cpp](https://github.com/ggml-org/llama.cpp.git)の`convert_hf_to_gguf.py`でGGUF形式変換し、`llama-quantize`で量子化しました。 + +元モデルが軽量ですので、実行環境が許せばBF16かQ8_0での利用をお勧めします。 + +## iMatrix生成 + +iMatrixは +[TFMC/imatrix-dataset-for-japanese-llm](https://huggingface.co/datasets/TFMC/imatrix-dataset-for-japanese-llm/tree/main) +の`c4_en_ja_imatrix.txt`を教師データに使用し`llama-imatrix`で生成しました。 + +## IQ4_XS量子化 + +**IQ4_XS量子化**では`llama-quantize`で +``` +llama_model_quantize_impl : tensor cols 4480 x 131072 are not divisible by 256, required for iq4_xs - using fallback quantization iq4_nl +``` +などとログ出力され、**4ビット量子化されたLayerの多くはIQ4_NL**になってます。表面上はIQ4_XSと表記していますが、中身はほぼIQ4_NLです。