commit d6f697f641872b85be857a40e5e5aa05bb302663 Author: ModelHub XC Date: Thu Apr 30 14:50:47 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: CelesteImperia/Llama-3.2-1B-Instruct-Platinum-GGUF Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..0523a90 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,40 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +Llama-3.2-1B-Instruct-Platinum-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3.2-1B-Instruct-Platinum-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3.2-1B-Instruct-Platinum-F16.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3.2-1B-Instruct-Platinum-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3.2-1B-Instruct-Platinum-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..bd18cce --- /dev/null +++ b/LICENSE @@ -0,0 +1,6 @@ +Llama 3.2 Community License Agreement + +This model is subject to the Llama 3.2 Community License. +The full license agreement, acceptable use policy, and +redistribution terms are available at: +https://github.com/meta-llama/llama-models/blob/main/models/llama3_2/LICENSE \ No newline at end of file diff --git a/Llama-3.2-1B-Instruct-Platinum-F16.gguf b/Llama-3.2-1B-Instruct-Platinum-F16.gguf new file mode 100644 index 0000000..dd71ed9 --- /dev/null +++ b/Llama-3.2-1B-Instruct-Platinum-F16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8fb605e9c67eca89fedc1fe3572281b41ee2afebb11f68f353b154ed22a06dda +size 2479595520 diff --git a/Llama-3.2-1B-Instruct-Platinum-Q4_K_M.gguf b/Llama-3.2-1B-Instruct-Platinum-Q4_K_M.gguf new file mode 100644 index 0000000..5686f34 --- /dev/null +++ b/Llama-3.2-1B-Instruct-Platinum-Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2f8f94e545892de7b52e04e5034b27d0a3b2f3b40e0b2d0f7fd2be32ee822b2 +size 807694336 diff --git a/Llama-3.2-1B-Instruct-Platinum-Q5_K_M.gguf b/Llama-3.2-1B-Instruct-Platinum-Q5_K_M.gguf new file mode 100644 index 0000000..7e2a108 --- /dev/null +++ b/Llama-3.2-1B-Instruct-Platinum-Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbfa4ef7162664517cc1eaf697b90c7c2dc6ebc00a84ea347b541c1dbe777a02 +size 911503360 diff --git a/Llama-3.2-1B-Instruct-Platinum-Q6_K.gguf b/Llama-3.2-1B-Instruct-Platinum-Q6_K.gguf new file mode 100644 index 0000000..54be132 --- /dev/null +++ b/Llama-3.2-1B-Instruct-Platinum-Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80920784774a16b943865c26c0d46a2b3b0349295bcd7c01eea7c3f1f07e1446 +size 1021800448 diff --git a/Llama-3.2-1B-Instruct-Platinum-Q8_0.gguf b/Llama-3.2-1B-Instruct-Platinum-Q8_0.gguf new file mode 100644 index 0000000..7926f37 --- /dev/null +++ b/Llama-3.2-1B-Instruct-Platinum-Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b788f47b1acfd0d62a931e211cf5a8f332d0b6bc863d33685503230e884ba62 +size 1321082880 diff --git a/README.md b/README.md new file mode 100644 index 0000000..e4ade6b --- /dev/null +++ b/README.md @@ -0,0 +1,94 @@ +--- +base_model: meta-llama/Llama-3.2-1B-Instruct +library_name: gguf +pipeline_tag: text-generation +license: llama3.2 +tags: +- gguf +- llama-cpp +- llama-3.2 +- celeste-imperia +--- + +# Llama-3.2-1B-Instruct-GGUF (Platinum Series) + +![Status](https://img.shields.io/badge/Status-Active-success) +![Format](https://img.shields.io/badge/Format-GGUF-green) +![Series](https://img.shields.io/badge/Series-Platinum-silver) +[![Support](https://img.shields.io/badge/Support-Razorpay-orange)](https://razorpay.me/@huggingface) + +This repository contains the **Platinum Series** universal GGUF release of **Llama-3.2-1B-Instruct**. This collection provides multiple quantization levels optimized for cross-platform performance, from mobile devices to high-VRAM workstations. + +## 📦 Available Files & Quantization Details + +| File Name | Quantization | Size | Accuracy | Recommended For | +| :--- | :--- | :--- | :--- | :--- | +| **Llama-3.2-1B-Instruct-Platinum-F16.gguf** | FP16 | ~2.5 GB | 100% | Master Reference / Benchmarking | +| **Llama-3.2-1B-Instruct-Platinum-Q8_0.gguf** | Q8_0 | ~1.3 GB | 99.9% | Platinum Reference / High-Fidelity | +| **Llama-3.2-1B-Instruct-Platinum-Q6_K.gguf** | Q6_K | ~1.0 GB | 99.7% | High-Quality Inference | +| **Llama-3.2-1B-Instruct-Platinum-Q5_K_M.gguf** | Q5_K_M | ~0.9 GB | 99.2% | Balanced Desktop Performance | +| **Llama-3.2-1B-Instruct-Platinum-Q4_K_M.gguf** | Q4_K_M | ~0.7 GB | 98.5% | Mobile / Low-Power Efficiency | + +--- + +## 🐍 Python Inference (llama-cpp-python) + +To run these engines using Python: + +```python +from llama_cpp import Llama + +llm = Llama( + model_path="Llama-3.2-1B-Instruct-Platinum-Q8_0.gguf", + n_gpu_layers=-1, # Target all layers to NVIDIA/Apple GPU + n_ctx=4096 +) + +output = llm("Explain the difference between a class and a struct in C#.", max_tokens=150) +print(output["choices"][0]["text"]) +``` + +--- + +## 💻 For C# / .NET Users (LLamaSharp) + +This collection is fully compatible with .NET applications via the ``LLamaSharp`` library. + +```csharp +using LLama.Common; +using LLama; + +var parameters = new ModelParams("Llama-3.2-1B-Instruct-Platinum-Q8_0.gguf") { + ContextSize = 4096, + GpuLayerCount = 35 +}; + +using var model = LLamaWeights.LoadFromFile(parameters); +using var context = model.CreateContext(parameters); +var executor = new InteractiveExecutor(context); + +Console.WriteLine("Universal Engine Active."); +``` + +--- + +## 🏗️ Technical Details +- **Optimization Tool:** llama.cpp (CUDA-accelerated) +- **Architecture:** Llama 3.2 (1B) +- **Hardware Validation:** Dual-GPU (RTX 3090 + RTX A4000) + +--- + +### ☕ Support the Forge + +| Platform | Support Link | +| :--- | :--- | +| **Global & India** | [Support via Razorpay](https://razorpay.me/@huggingface) | + +**Scan to support via UPI (India Only):** + + + +--- + +**Connect with the architect:** [Abhishek Jaiswal on LinkedIn](https://www.linkedin.com/in/abhishek-jaiswal-524056a/) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..86c375f --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +optimum-intel[openvino,nncf]>=1.20.0 +transformers>=4.45.0 +accelerate +sentencepiece \ No newline at end of file