From 95591f5eac6e4b5f6411eb99875370f97cc910c5 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Mon, 20 Apr 2026 11:16:20 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: Mungert/sarvam-translate-GGUF Source: Original Platform --- .gitattributes | 85 ++++++++ README.md | 354 ++++++++++++++++++++++++++++++++ sarvam-translate-bf16.gguf | 3 + sarvam-translate-bf16.mmproj | 3 + sarvam-translate-bf16_q8_0.gguf | 3 + sarvam-translate-f16.mmproj | 3 + sarvam-translate-f16_q8_0.gguf | 3 + sarvam-translate-f32.mmproj | 3 + sarvam-translate-iq1_m.gguf | 3 + sarvam-translate-iq1_s.gguf | 3 + sarvam-translate-iq2_m.gguf | 3 + sarvam-translate-iq2_s.gguf | 3 + sarvam-translate-iq2_xs.gguf | 3 + sarvam-translate-iq2_xxs.gguf | 3 + sarvam-translate-iq3_m.gguf | 3 + sarvam-translate-iq3_s.gguf | 3 + sarvam-translate-iq3_xs.gguf | 3 + sarvam-translate-iq3_xxs.gguf | 3 + sarvam-translate-iq4_nl.gguf | 3 + sarvam-translate-iq4_xs.gguf | 3 + sarvam-translate-q2_k_m.gguf | 3 + sarvam-translate-q2_k_s.gguf | 3 + sarvam-translate-q3_k_m.gguf | 3 + sarvam-translate-q3_k_s.gguf | 3 + sarvam-translate-q4_0.gguf | 3 + sarvam-translate-q4_1.gguf | 3 + sarvam-translate-q4_k_m.gguf | 3 + sarvam-translate-q4_k_s.gguf | 3 + sarvam-translate-q5_0.gguf | 3 + sarvam-translate-q5_1.gguf | 3 + sarvam-translate-q5_k_m.gguf | 3 + sarvam-translate-q5_k_s.gguf | 3 + sarvam-translate-q6_k_m.gguf | 3 + sarvam-translate-q8_0.gguf | 3 + sarvam-translate-q8_0.mmproj | 3 + sarvam-translate-tq1_0.gguf | 3 + sarvam-translate-tq2_0.gguf | 3 + sarvam-translate.imatrix | 3 + 38 files changed, 547 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 sarvam-translate-bf16.gguf create mode 100644 sarvam-translate-bf16.mmproj create mode 100644 sarvam-translate-bf16_q8_0.gguf create mode 100644 sarvam-translate-f16.mmproj create mode 100644 sarvam-translate-f16_q8_0.gguf create mode 100644 sarvam-translate-f32.mmproj create mode 100644 sarvam-translate-iq1_m.gguf create mode 100644 sarvam-translate-iq1_s.gguf create mode 100644 sarvam-translate-iq2_m.gguf create mode 100644 sarvam-translate-iq2_s.gguf create mode 100644 sarvam-translate-iq2_xs.gguf create mode 100644 sarvam-translate-iq2_xxs.gguf create mode 100644 sarvam-translate-iq3_m.gguf create mode 100644 sarvam-translate-iq3_s.gguf create mode 100644 sarvam-translate-iq3_xs.gguf create mode 100644 sarvam-translate-iq3_xxs.gguf create mode 100644 sarvam-translate-iq4_nl.gguf create mode 100644 sarvam-translate-iq4_xs.gguf create mode 100644 sarvam-translate-q2_k_m.gguf create mode 100644 sarvam-translate-q2_k_s.gguf create mode 100644 sarvam-translate-q3_k_m.gguf create mode 100644 sarvam-translate-q3_k_s.gguf create mode 100644 sarvam-translate-q4_0.gguf create mode 100644 sarvam-translate-q4_1.gguf create mode 100644 sarvam-translate-q4_k_m.gguf create mode 100644 sarvam-translate-q4_k_s.gguf create mode 100644 sarvam-translate-q5_0.gguf create mode 100644 sarvam-translate-q5_1.gguf create mode 100644 sarvam-translate-q5_k_m.gguf create mode 100644 sarvam-translate-q5_k_s.gguf create mode 100644 sarvam-translate-q6_k_m.gguf create mode 100644 sarvam-translate-q8_0.gguf create mode 100644 sarvam-translate-q8_0.mmproj create mode 100644 sarvam-translate-tq1_0.gguf create mode 100644 sarvam-translate-tq2_0.gguf create mode 100644 sarvam-translate.imatrix diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..2f35abb --- /dev/null +++ b/.gitattributes @@ -0,0 +1,85 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +sarvam-translate-f16.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-f16_q8_0.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-bf16_q8_0.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-f16_q6_k.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-bf16_q6_k.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-f16_q4_k.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-bf16_q4_k.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q2_k_l.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q3_k_l.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q4_k_l.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q5_k_l.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q6_k_l.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q2_k_m.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q2_k_s.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q3_k_m.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q3_k_s.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q4_k_m.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q4_k_s.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q5_k_m.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q5_k_s.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q6_k_m.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q8_0.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q4_0.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q4_1.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q4_0_l.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q4_1_l.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q5_0.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q5_1.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q5_0_l.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q5_1_l.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-iq1_s.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-iq1_m.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-iq2_xs.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-iq2_xxs.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-iq2_s.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-iq2_m.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-iq3_xs.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-iq3_xxs.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-iq3_s.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-iq3_m.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-iq4_xs.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-iq4_nl.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-tq1_0.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate-tq2_0.gguf filter=lfs diff=lfs merge=lfs -text +sarvam-translate.imatrix filter=lfs diff=lfs merge=lfs -text +sarvam-translate-f32.mmproj filter=lfs diff=lfs merge=lfs -text +sarvam-translate-q8_0.mmproj filter=lfs diff=lfs merge=lfs -text +sarvam-translate-f16.mmproj filter=lfs diff=lfs merge=lfs -text +sarvam-translate-bf16.mmproj filter=lfs diff=lfs merge=lfs -text +sarvam-translate-bf16.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..1371fc1 --- /dev/null +++ b/README.md @@ -0,0 +1,354 @@ +--- +library_name: transformers +license: gpl-3.0 +language: +- as +- bn +- brx +- doi +- gom +- gu +- en +- hi +- kn +- ks +- mai +- ml +- mni +- mr +- ne +- or +- pa +- sa +- sat +- sd +- ta +- te +- ur +base_model: +- google/gemma-3-4b-it +base_model_relation: finetune +pipeline_tag: translation +--- + +# sarvam-translate GGUF Models + + +## Model Generation Details + +This model was generated using [llama.cpp](https://github.com/ggerganov/llama.cpp) at commit [`1f63e75f`](https://github.com/ggerganov/llama.cpp/commit/1f63e75f3b5dc7f44dbe63c8a41d23958fe95bc0). + + + + +## Quantization beyond the IMatrix + +Testing a new quantization method using rules to bump important layers above what the standard imatrix would use. + +I have found that the standard IMatrix does not perform very well at low bit quantiztion and for MOE models. So I am using llama.cpp --tensor-type to bump up selected layers. See [Layer bumping with llama.cpp](https://github.com/Mungert69/GGUFModelBuilder/blob/main/model-converter/tensor_list_builder.py) + +This does create larger model files but increases precision for a given model size. + +### **Please provide feedback on how you find this method performs** + + + + +## **Choosing the Right Model Format** + +Selecting the correct model format depends on your **hardware capabilities** and **memory constraints**. + +### **BF16 (Brain Float 16) – Use if BF16 acceleration is available** +- A 16-bit floating-point format designed for **faster computation** while retaining good precision. +- Provides **similar dynamic range** as FP32 but with **lower memory usage**. +- Recommended if your hardware supports **BF16 acceleration** (check your device's specs). +- Ideal for **high-performance inference** with **reduced memory footprint** compared to FP32. + +πŸ“Œ **Use BF16 if:** +βœ” Your hardware has native **BF16 support** (e.g., newer GPUs, TPUs). +βœ” You want **higher precision** while saving memory. +βœ” You plan to **requantize** the model into another format. + +πŸ“Œ **Avoid BF16 if:** +❌ Your hardware does **not** support BF16 (it may fall back to FP32 and run slower). +❌ You need compatibility with older devices that lack BF16 optimization. + +--- + +### **F16 (Float 16) – More widely supported than BF16** +- A 16-bit floating-point **high precision** but with less of range of values than BF16. +- Works on most devices with **FP16 acceleration support** (including many GPUs and some CPUs). +- Slightly lower numerical precision than BF16 but generally sufficient for inference. + +πŸ“Œ **Use F16 if:** +βœ” Your hardware supports **FP16** but **not BF16**. +βœ” You need a **balance between speed, memory usage, and accuracy**. +βœ” You are running on a **GPU** or another device optimized for FP16 computations. + +πŸ“Œ **Avoid F16 if:** +❌ Your device lacks **native FP16 support** (it may run slower than expected). +❌ You have memory limitations. + +--- + +### **Hybrid Precision Models (e.g., `bf16_q8_0`, `f16_q4_K`) – Best of Both Worlds** +These formats selectively **quantize non-essential layers** while keeping **key layers in full precision** (e.g., attention and output layers). + +- Named like `bf16_q8_0` (meaning **full-precision BF16 core layers + quantized Q8_0 other layers**). +- Strike a **balance between memory efficiency and accuracy**, improving over fully quantized models without requiring the full memory of BF16/F16. + +πŸ“Œ **Use Hybrid Models if:** +βœ” You need **better accuracy than quant-only models** but can’t afford full BF16/F16 everywhere. +βœ” Your device supports **mixed-precision inference**. +βœ” You want to **optimize trade-offs** for production-grade models on constrained hardware. + +πŸ“Œ **Avoid Hybrid Models if:** +❌ Your target device doesn’t support **mixed or full-precision acceleration**. +❌ You are operating under **ultra-strict memory limits** (in which case use fully quantized formats). + +--- + +### **Quantized Models (Q4_K, Q6_K, Q8, etc.) – For CPU & Low-VRAM Inference** +Quantization reduces model size and memory usage while maintaining as much accuracy as possible. +- **Lower-bit models (Q4_K)** β†’ **Best for minimal memory usage**, may have lower precision. +- **Higher-bit models (Q6_K, Q8_0)** β†’ **Better accuracy**, requires more memory. + +πŸ“Œ **Use Quantized Models if:** +βœ” You are running inference on a **CPU** and need an optimized model. +βœ” Your device has **low VRAM** and cannot load full-precision models. +βœ” You want to reduce **memory footprint** while keeping reasonable accuracy. + +πŸ“Œ **Avoid Quantized Models if:** +❌ You need **maximum accuracy** (full-precision models are better for this). +❌ Your hardware has enough VRAM for higher-precision formats (BF16/F16). + +--- + +### **Very Low-Bit Quantization (IQ3_XS, IQ3_S, IQ3_M, Q4_K, Q4_0)** +These models are optimized for **very high memory efficiency**, making them ideal for **low-power devices** or **large-scale deployments** where memory is a critical constraint. + +- **IQ3_XS**: Ultra-low-bit quantization (3-bit) with **very high memory efficiency**. + - **Use case**: Best for **ultra-low-memory devices** where even Q4_K is too large. + - **Trade-off**: Lower accuracy compared to higher-bit quantizations. + +- **IQ3_S**: Small block size for **maximum memory efficiency**. + - **Use case**: Best for **low-memory devices** where **IQ3_XS** is too aggressive. + +- **IQ3_M**: Medium block size for better accuracy than **IQ3_S**. + - **Use case**: Suitable for **low-memory devices** where **IQ3_S** is too limiting. + +- **Q4_K**: 4-bit quantization with **block-wise optimization** for better accuracy. + - **Use case**: Best for **low-memory devices** where **Q6_K** is too large. + +- **Q4_0**: Pure 4-bit quantization, optimized for **ARM devices**. + - **Use case**: Best for **ARM-based devices** or **low-memory environments**. + +### **Ultra Low-Bit Quantization (IQ1_S IQ1_M IQ2_S IQ2_M IQ2_XS IQ2_XSS)** +- *Ultra-low-bit quantization (1 2-bit) with **extreme memory efficiency**. + - **Use case**: Best for cases were you have to fit the model into very constrained memory + - **Trade-off**: Very Low Accuracy. May not function as expected. Please test fully before using. + +--- + +### **Summary Table: Model Format Selection** + + +| Model Format | Precision | Memory Usage | Device Requirements | Best Use Case | +|--------------------------|------------------|------------------|----------------------------------|--------------------------------------------------------------| +| **BF16** | Very High | High | BF16-supported GPU/CPU | High-speed inference with reduced memory | +| **F16** | High | High | FP16-supported GPU/CPU | Inference when BF16 isn’t available | +| **Q4_K** | Medium-Low | Low | CPU or Low-VRAM devices | Memory-constrained inference | +| **Q6_K** | Medium | Moderate | CPU with more memory | Better accuracy with quantization | +| **Q8_0** | High | Moderate | GPU/CPU with moderate VRAM | Highest accuracy among quantized models | +| **IQ3_XS** | Low | Very Low | Ultra-low-memory devices | Max memory efficiency, low accuracy | +| **IQ3_S** | Low | Very Low | Low-memory devices | Slightly more usable than IQ3_XS | +| **IQ3_M** | Low-Medium | Low | Low-memory devices | Better accuracy than IQ3_S | +| **Q4_0** | Low | Low | ARM-based/embedded devices | Llama.cpp automatically optimizes for ARM inference | +| **Ultra Low-Bit (IQ1/2_*)** | Very Low | Extremely Low | Tiny edge/embedded devices | Fit models in extremely tight memory; low accuracy | +| **Hybrid (e.g., `bf16_q8_0`)** | Medium–High | Medium | Mixed-precision capable hardware | Balanced performance and memory, near-FP accuracy in critical layers | + +--- + + + + + +# Sarvam-Translate +

+ + Try on Sarvam Playground + +

+Sarvam-Translate is an advanced translation model from Sarvam AI, specifically designed for comprehensive, document-level translation across the 22 official Indian languages, built on Gemma3-4B-IT. It addresses modern translation needs by moving beyond isolated sentences to handle long-context inputs, diverse content types, and various formats. Sarvam-Translate aims to provide high-quality, contextually aware translations for Indian languages, which have traditionally lagged behind high-resource languages in LLM performance. + +Learn more about Sarvam-Translate in our detailed [blog post](https://www.sarvam.ai/blogs/sarvam-translate). + +## Key Features +- **Comprehensive Indian Language Support**: Focus on the 22 official Indian languages, ensuring nuanced and accurate translations. +- **Advanced Document-Level Translation**: Translates entire documents, web pages, speeches, textbooks, and scientific articles, not just isolated sentences. +- **Versatile Format Handling**: Processes a wide array of input formats, including markdown, digitized content (handling OCR errors), documents with embedded math and chemistry equations, and code files (translating only comments). +- **Context-Aware & Inclusive**: Engineered to respect different contexts, formats, styles (formal/informal), and ensure inclusivity (e.g., appropriate gender attribution). + +## Supported languages list + +`Assamese`, `Bengali`, `Bodo`, `Dogri`, `Gujarati`, `English`, `Hindi`, `Kannada`, `Kashmiri`, `Konkani`, `Maithili`, `Malayalam`, `Manipuri`, `Marathi`, `Nepali`, `Odia`, `Punjabi`, `Sanskrit`, `Santali`, `Sindhi`, `Tamil`, `Telugu`, `Urdu` + +## Quickstart +The following code snippet demonstrates how to use Sarvam-Translate using Transformers. +```python +from transformers import AutoModelForCausalLM, AutoTokenizer + +model_name = "sarvamai/sarvam-translate" + +# Load tokenizer and model +tokenizer = AutoTokenizer.from_pretrained(model_name) +model = AutoModelForCausalLM.from_pretrained(model_name).to('cuda:0') + +# Translation task +tgt_lang = "Hindi" +input_txt = "Be the change you wish to see in the world." + +# Chat-style message prompt +messages = [ + {"role": "system", "content": f"Translate the text below to {tgt_lang}."}, + {"role": "user", "content": input_txt} +] + +# Apply chat template to structure the conversation +text = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True +) + +# Tokenize and move input to model device +model_inputs = tokenizer([text], return_tensors="pt").to(model.device) + +# Generate the output +generated_ids = model.generate( + **model_inputs, + max_new_tokens=1024, + do_sample=True, + temperature=0.01, + num_return_sequences=1 +) +output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist() +output_text = tokenizer.decode(output_ids, skip_special_tokens=True) + +print("Input:", input_txt) +print("Translation:", output_text) + +``` + +## vLLM Deployment + + +### Server: +```bash +vllm serve sarvamai/sarvam-translate --port 8000 --dtype bfloat16 +``` + +### Client: +```python +from openai import OpenAI + +# Modify OpenAI's API key and API base to use vLLM's API server. +openai_api_key = "EMPTY" +openai_api_base = "http://localhost:8000/v1" + +client = OpenAI( + api_key=openai_api_key, + base_url=openai_api_base, +) + +models = client.models.list() +model = models.data[0].id + + +tgt_lang = 'Hindi' +input_txt = 'Be the change you wish to see in the world.' +messages = [{"role": "system", "content": f"Translate the text below to {tgt_lang}."}, {"role": "user", "content": input_txt}] + + +response = client.chat.completions.create(model=model, messages=messages, temperature=0.01) +output_text = response.choices[0].message.content + +print("Input:", input_txt) +print("Translation:", output_text) +``` + +## With Sarvam APIs + +Refer our [python client documentation](https://pypi.org/project/sarvamai/). + +Sample code: + +```python +from sarvamai import SarvamAI +client = SarvamAI() +response = client.text.translate( + input="Be the change you wish to see in the world.", + source_language_code="en-IN", + target_language_code="hi-IN", + speaker_gender="Male", + model="sarvam-translate:v1", +) +``` +# πŸš€ If you find these models useful + +Help me test my **AI-Powered Quantum Network Monitor Assistant** with **quantum-ready security checks**: + +πŸ‘‰ [Quantum Network Monitor](https://readyforquantum.com/?assistant=open&utm_source=huggingface&utm_medium=referral&utm_campaign=huggingface_repo_readme) + + +The full Open Source Code for the Quantum Network Monitor Service available at my github repos ( repos with NetworkMonitor in the name) : [Source Code Quantum Network Monitor](https://github.com/Mungert69). You will also find the code I use to quantize the models if you want to do it yourself [GGUFModelBuilder](https://github.com/Mungert69/GGUFModelBuilder) + +πŸ’¬ **How to test**: + Choose an **AI assistant type**: + - `TurboLLM` (GPT-4.1-mini) + - `HugLLM` (Hugginface Open-source models) + - `TestLLM` (Experimental CPU-only) + +### **What I’m Testing** +I’m pushing the limits of **small open-source models for AI network monitoring**, specifically: +- **Function calling** against live network services +- **How small can a model go** while still handling: + - Automated **Nmap security scans** + - **Quantum-readiness checks** + - **Network Monitoring tasks** + +🟑 **TestLLM** – Current experimental model (llama.cpp on 2 CPU threads on huggingface docker space): +- βœ… **Zero-configuration setup** +- ⏳ 30s load time (slow inference but **no API costs**) . No token limited as the cost is low. +- πŸ”§ **Help wanted!** If you’re into **edge-device AI**, let’s collaborate! + +### **Other Assistants** +🟒 **TurboLLM** – Uses **gpt-4.1-mini** : +- **It performs very well but unfortunatly OpenAI charges per token. For this reason tokens usage is limited. +- **Create custom cmd processors to run .net code on Quantum Network Monitor Agents** +- **Real-time network diagnostics and monitoring** +- **Security Audits** +- **Penetration testing** (Nmap/Metasploit) + +πŸ”΅ **HugLLM** – Latest Open-source models: +- 🌐 Runs on Hugging Face Inference API. Performs pretty well using the lastest models hosted on Novita. + +### πŸ’‘ **Example commands you could test**: +1. `"Give me info on my websites SSL certificate"` +2. `"Check if my server is using quantum safe encyption for communication"` +3. `"Run a comprehensive security audit on my server"` +4. '"Create a cmd processor to .. (what ever you want)" Note you need to install a Quantum Network Monitor Agent to run the .net code from. This is a very flexible and powerful feature. Use with caution! + +### Final Word + +I fund the servers used to create these model files, run the Quantum Network Monitor service, and pay for inference from Novita and OpenAIβ€”all out of my own pocket. All the code behind the model creation and the Quantum Network Monitor project is [open source](https://github.com/Mungert69). Feel free to use whatever you find helpful. + +If you appreciate the work, please consider [buying me a coffee](https://www.buymeacoffee.com/mahadeva) β˜•. Your support helps cover service costs and allows me to raise token limits for everyone. + +I'm also open to job opportunities or sponsorship. + +Thank you! 😊 diff --git a/sarvam-translate-bf16.gguf b/sarvam-translate-bf16.gguf new file mode 100644 index 0000000..e0584c9 --- /dev/null +++ b/sarvam-translate-bf16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a0dd1047ec70d350f21bff12cf652d181a7f24a4e161a81dc6b70e7b1384c63 +size 7767802624 diff --git a/sarvam-translate-bf16.mmproj b/sarvam-translate-bf16.mmproj new file mode 100644 index 0000000..4bfd996 --- /dev/null +++ b/sarvam-translate-bf16.mmproj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21c6336bdbf47bb5e4fcd83a7f37850cb43ef9f46a4ac379a92075ce8370a135 +size 851251936 diff --git a/sarvam-translate-bf16_q8_0.gguf b/sarvam-translate-bf16_q8_0.gguf new file mode 100644 index 0000000..545fd09 --- /dev/null +++ b/sarvam-translate-bf16_q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:201a879cec3f63540553f89d424dc4d17d76827b57a5ad420d190050cabff98d +size 5845959424 diff --git a/sarvam-translate-f16.mmproj b/sarvam-translate-f16.mmproj new file mode 100644 index 0000000..d83bc1e --- /dev/null +++ b/sarvam-translate-f16.mmproj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4a160f9b04e99a45f5f61455ae7ba4ec911171ec3a2245734cecf0513dc653d +size 851251936 diff --git a/sarvam-translate-f16_q8_0.gguf b/sarvam-translate-f16_q8_0.gguf new file mode 100644 index 0000000..163d01f --- /dev/null +++ b/sarvam-translate-f16_q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ffbe90b5d2136985db6f2f4ec75decf0bda066538e8d00b591716018860d407 +size 5845959424 diff --git a/sarvam-translate-f32.mmproj b/sarvam-translate-f32.mmproj new file mode 100644 index 0000000..fcf8d79 --- /dev/null +++ b/sarvam-translate-f32.mmproj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f2e22ad8b798e265afa0fb83d580accb34a4160c1e5d2ef6a328ec73c9c8763 +size 1673392864 diff --git a/sarvam-translate-iq1_m.gguf b/sarvam-translate-iq1_m.gguf new file mode 100644 index 0000000..4f2e900 --- /dev/null +++ b/sarvam-translate-iq1_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d3ea22e04fdc533fabbcc558cf9d774c134d18014476f311925cf2dbc0e2ef +size 1368816704 diff --git a/sarvam-translate-iq1_s.gguf b/sarvam-translate-iq1_s.gguf new file mode 100644 index 0000000..f92662f --- /dev/null +++ b/sarvam-translate-iq1_s.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:50eace1f97bd2bf342543d5acfa8613c635f4364621deb3a8765929649cbbae3 +size 1290992704 diff --git a/sarvam-translate-iq2_m.gguf b/sarvam-translate-iq2_m.gguf new file mode 100644 index 0000000..6bf2a14 --- /dev/null +++ b/sarvam-translate-iq2_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a728006bf069cb82f6f3f6e094e043b362f1c71f407e96e0079ddbdfd551598 +size 1592908864 diff --git a/sarvam-translate-iq2_s.gguf b/sarvam-translate-iq2_s.gguf new file mode 100644 index 0000000..ca5953e --- /dev/null +++ b/sarvam-translate-iq2_s.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b02dc6ee39c6431e465a960d0862ed65ee1209de4764cd1a1a6e30f2c0e3b8c +size 1532288064 diff --git a/sarvam-translate-iq2_xs.gguf b/sarvam-translate-iq2_xs.gguf new file mode 100644 index 0000000..4061d1b --- /dev/null +++ b/sarvam-translate-iq2_xs.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af81dc6f484b14626278b8718b89cac0b9acef034535ef95bab730a1a34789b3 +size 1494154304 diff --git a/sarvam-translate-iq2_xxs.gguf b/sarvam-translate-iq2_xxs.gguf new file mode 100644 index 0000000..dc00848 --- /dev/null +++ b/sarvam-translate-iq2_xxs.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64ad84a8c2eea43cf0fca7cd59b36a9c00eda4f17772342de4feb1c0bf17340a +size 1402403904 diff --git a/sarvam-translate-iq3_m.gguf b/sarvam-translate-iq3_m.gguf new file mode 100644 index 0000000..b65918a --- /dev/null +++ b/sarvam-translate-iq3_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:53e2926a8956400f9b6365e86f83fa60be00bcdd1554406cd137d35c75ce891b +size 1875369024 diff --git a/sarvam-translate-iq3_s.gguf b/sarvam-translate-iq3_s.gguf new file mode 100644 index 0000000..226aed2 --- /dev/null +++ b/sarvam-translate-iq3_s.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5f0e811a165c8124e7459ec2570aa48f7c01b7ff4a556e19d0ab716a9ca7e80 +size 1848212544 diff --git a/sarvam-translate-iq3_xs.gguf b/sarvam-translate-iq3_xs.gguf new file mode 100644 index 0000000..125b484 --- /dev/null +++ b/sarvam-translate-iq3_xs.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcd7eda12c05b88ba828447a8c6c5a7dc8ae649924b53babf68bb02a2bcb09bb +size 1774238784 diff --git a/sarvam-translate-iq3_xxs.gguf b/sarvam-translate-iq3_xxs.gguf new file mode 100644 index 0000000..202080b --- /dev/null +++ b/sarvam-translate-iq3_xxs.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3f27fcd33db120886ae1ba91a3e3b35ed6de46d1c13cc732950a714b29e128c +size 1700265024 diff --git a/sarvam-translate-iq4_nl.gguf b/sarvam-translate-iq4_nl.gguf new file mode 100644 index 0000000..9cfee43 --- /dev/null +++ b/sarvam-translate-iq4_nl.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b1473bba165e8ab817edc523f7b5a2c334b2c54371d168986dfbf822fd9cb40 +size 2363511104 diff --git a/sarvam-translate-iq4_xs.gguf b/sarvam-translate-iq4_xs.gguf new file mode 100644 index 0000000..1fe8b4b --- /dev/null +++ b/sarvam-translate-iq4_xs.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4be7969239c58d754fb4619c208de8897825fe6953a8deda67cffe32447875f0 +size 2263241024 diff --git a/sarvam-translate-q2_k_m.gguf b/sarvam-translate-q2_k_m.gguf new file mode 100644 index 0000000..231c797 --- /dev/null +++ b/sarvam-translate-q2_k_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c013ae57a9ad7317ad88f37e38ed91049d859695e12ea3df513bba475887b7c0 +size 1716384064 diff --git a/sarvam-translate-q2_k_s.gguf b/sarvam-translate-q2_k_s.gguf new file mode 100644 index 0000000..7ced667 --- /dev/null +++ b/sarvam-translate-q2_k_s.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:945e3d9ad582922718d79c73ac9be15a1d5fe4eb4bc11a28ddf5511306bb0ebe +size 1537244224 diff --git a/sarvam-translate-q3_k_m.gguf b/sarvam-translate-q3_k_m.gguf new file mode 100644 index 0000000..07a7a99 --- /dev/null +++ b/sarvam-translate-q3_k_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d00e55b9e0b79740ef98185e19beb8f423bf98987730f4aa00d0be49f3831d4a +size 2082730304 diff --git a/sarvam-translate-q3_k_s.gguf b/sarvam-translate-q3_k_s.gguf new file mode 100644 index 0000000..6865f13 --- /dev/null +++ b/sarvam-translate-q3_k_s.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e713b1ea98152db76e0fb36030cf373a2109c75796e2d99aea67194b4b550f4a +size 1875901504 diff --git a/sarvam-translate-q4_0.gguf b/sarvam-translate-q4_0.gguf new file mode 100644 index 0000000..b39f175 --- /dev/null +++ b/sarvam-translate-q4_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ed49603845a8e0cc5d578d48df027998ae1bfa2e721c724ae6edc4f4a0b35a8 +size 2190453824 diff --git a/sarvam-translate-q4_1.gguf b/sarvam-translate-q4_1.gguf new file mode 100644 index 0000000..3bb6c62 --- /dev/null +++ b/sarvam-translate-q4_1.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b37a82bc3d5976cd84045941a4891334746bc7b90dd24c06d50d4a80eeb8bc20 +size 2432947264 diff --git a/sarvam-translate-q4_k_m.gguf b/sarvam-translate-q4_k_m.gguf new file mode 100644 index 0000000..bf4e889 --- /dev/null +++ b/sarvam-translate-q4_k_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b61ebb739505d9884482170087949b3076e1c095b24bdab68f3d018ef1dab2d +size 2464497984 diff --git a/sarvam-translate-q4_k_s.gguf b/sarvam-translate-q4_k_s.gguf new file mode 100644 index 0000000..73fb32f --- /dev/null +++ b/sarvam-translate-q4_k_s.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe82f5ff77f83d8aa8b78c1c1e78f3690f473e3c211ce2d869fe4da79ed1afeb +size 2397589824 diff --git a/sarvam-translate-q5_0.gguf b/sarvam-translate-q5_0.gguf new file mode 100644 index 0000000..2c68d6a --- /dev/null +++ b/sarvam-translate-q5_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14c72e442b991ec166dc77e19d7ed45146e83f9bd879c0fbbb8599156cb97aad +size 2675440704 diff --git a/sarvam-translate-q5_1.gguf b/sarvam-translate-q5_1.gguf new file mode 100644 index 0000000..ba78a5f --- /dev/null +++ b/sarvam-translate-q5_1.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39a6fac6f63d2fac7946abfee1f756332c573e991c49ff391698610c77b1fd7b +size 2917934144 diff --git a/sarvam-translate-q5_k_m.gguf b/sarvam-translate-q5_k_m.gguf new file mode 100644 index 0000000..c81829f --- /dev/null +++ b/sarvam-translate-q5_k_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25d7cb9af6983d89129fa3c7de0c098d9e0516d8b05f4d82f0738236087f6055 +size 2835267904 diff --git a/sarvam-translate-q5_k_s.gguf b/sarvam-translate-q5_k_s.gguf new file mode 100644 index 0000000..cc74179 --- /dev/null +++ b/sarvam-translate-q5_k_s.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cba78da0f422904031fbc6dfa3ac4d6b0c8eb0a2e117e5f2fe6a4c7d26d66c4 +size 2800800064 diff --git a/sarvam-translate-q6_k_m.gguf b/sarvam-translate-q6_k_m.gguf new file mode 100644 index 0000000..1f3400e --- /dev/null +++ b/sarvam-translate-q6_k_m.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46266d04e37d69f1df3d4b6708d58d8ec54ef8c8c1d363ce614a8c3ab7a632fa +size 3190739264 diff --git a/sarvam-translate-q8_0.gguf b/sarvam-translate-q8_0.gguf new file mode 100644 index 0000000..62bc785 --- /dev/null +++ b/sarvam-translate-q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f53e8684210b3ba170fdee2bb01d286ddecede28f033293fddd3e5cdff316964 +size 4130401024 diff --git a/sarvam-translate-q8_0.mmproj b/sarvam-translate-q8_0.mmproj new file mode 100644 index 0000000..b6a739c --- /dev/null +++ b/sarvam-translate-q8_0.mmproj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92d18f715001de72083431366accd65f55b753e765fe3738f8b1806cdfbaa2e7 +size 591378016 diff --git a/sarvam-translate-tq1_0.gguf b/sarvam-translate-tq1_0.gguf new file mode 100644 index 0000000..1e67460 --- /dev/null +++ b/sarvam-translate-tq1_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:040a74815c596ec0d25f4356ef7a99e5387cd36f24111304c7b61a121ba8216a +size 1235472704 diff --git a/sarvam-translate-tq2_0.gguf b/sarvam-translate-tq2_0.gguf new file mode 100644 index 0000000..915b813 --- /dev/null +++ b/sarvam-translate-tq2_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0300e9d3ce60709c3736d3b2b25a5ea706e65ab1574e3aba73b33ea48c4cdbe +size 1385877824 diff --git a/sarvam-translate.imatrix b/sarvam-translate.imatrix new file mode 100644 index 0000000..c605015 --- /dev/null +++ b/sarvam-translate.imatrix @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22e2492ca95820bf901f0fef40d9d15f3702d75b60f81001fec93661c1db3439 +size 3419902