commit ab296485d79e4a939677e69b7159ffbb8f7a19c4 Author: ModelHub XC Date: Sun Jun 21 08:21:13 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: QuantFactory/OneLLM-Doey-V1-Llama-3.2-3B-GGUF Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9ebfc7f --- /dev/null +++ b/.gitattributes @@ -0,0 +1,49 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +OneLLM-Doey-V1-Llama-3.2-3B.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +OneLLM-Doey-V1-Llama-3.2-3B.Q4_1.gguf filter=lfs diff=lfs merge=lfs -text +OneLLM-Doey-V1-Llama-3.2-3B.Q4_0.gguf filter=lfs diff=lfs merge=lfs -text +OneLLM-Doey-V1-Llama-3.2-3B.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text +OneLLM-Doey-V1-Llama-3.2-3B.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +OneLLM-Doey-V1-Llama-3.2-3B.Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +OneLLM-Doey-V1-Llama-3.2-3B.Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text +OneLLM-Doey-V1-Llama-3.2-3B.Q5_0.gguf filter=lfs diff=lfs merge=lfs -text +OneLLM-Doey-V1-Llama-3.2-3B.Q5_1.gguf filter=lfs diff=lfs merge=lfs -text +OneLLM-Doey-V1-Llama-3.2-3B.Q2_K.gguf filter=lfs diff=lfs merge=lfs -text +OneLLM-Doey-V1-Llama-3.2-3B.Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text +OneLLM-Doey-V1-Llama-3.2-3B.Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text +OneLLM-Doey-V1-Llama-3.2-3B.Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text +OneLLM-Doey-V1-Llama-3.2-3B.Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/OneLLM-Doey-V1-Llama-3.2-3B.Q2_K.gguf b/OneLLM-Doey-V1-Llama-3.2-3B.Q2_K.gguf new file mode 100644 index 0000000..6823b93 --- /dev/null +++ b/OneLLM-Doey-V1-Llama-3.2-3B.Q2_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0c5e916a3cf2b3d858ce24200ebdefd3faab11499c7fca5e754a198300e92a7 +size 1493217888 diff --git a/OneLLM-Doey-V1-Llama-3.2-3B.Q3_K_L.gguf b/OneLLM-Doey-V1-Llama-3.2-3B.Q3_K_L.gguf new file mode 100644 index 0000000..4010e26 --- /dev/null +++ b/OneLLM-Doey-V1-Llama-3.2-3B.Q3_K_L.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4478ec9967759f0a96330f0c9f7af263f411902ed53860c0999360576f3dd7b +size 1984645728 diff --git a/OneLLM-Doey-V1-Llama-3.2-3B.Q3_K_M.gguf b/OneLLM-Doey-V1-Llama-3.2-3B.Q3_K_M.gguf new file mode 100644 index 0000000..40f9b95 --- /dev/null +++ b/OneLLM-Doey-V1-Llama-3.2-3B.Q3_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcab9808572d2b9229f8d60528d2d9f3153a976a1fafef45604408df297143f8 +size 1856457312 diff --git a/OneLLM-Doey-V1-Llama-3.2-3B.Q3_K_S.gguf b/OneLLM-Doey-V1-Llama-3.2-3B.Q3_K_S.gguf new file mode 100644 index 0000000..5d92815 --- /dev/null +++ b/OneLLM-Doey-V1-Llama-3.2-3B.Q3_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:244d8a6f5e46cf45928032fc939729bcd628c227c1cc20a8609a721dd9ef8915 +size 1712147040 diff --git a/OneLLM-Doey-V1-Llama-3.2-3B.Q4_0.gguf b/OneLLM-Doey-V1-Llama-3.2-3B.Q4_0.gguf new file mode 100644 index 0000000..bbcc4fd --- /dev/null +++ b/OneLLM-Doey-V1-Llama-3.2-3B.Q4_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a34aafea679fc234b1f26ac1ec3b486c40dec41ede339b04d96edf3ef74ea67 +size 2138817120 diff --git a/OneLLM-Doey-V1-Llama-3.2-3B.Q4_1.gguf b/OneLLM-Doey-V1-Llama-3.2-3B.Q4_1.gguf new file mode 100644 index 0000000..abb126b --- /dev/null +++ b/OneLLM-Doey-V1-Llama-3.2-3B.Q4_1.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1036eac1d95e2fc759bdb5670053f4b2158dbfdc946b2149da339dd23237a52 +size 2339603040 diff --git a/OneLLM-Doey-V1-Llama-3.2-3B.Q4_K_M.gguf b/OneLLM-Doey-V1-Llama-3.2-3B.Q4_K_M.gguf new file mode 100644 index 0000000..3a1b096 --- /dev/null +++ b/OneLLM-Doey-V1-Llama-3.2-3B.Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57e93584bfb708a9841edffd70635c21f27955d8a1b4e346a72edc8163394a97 +size 2241004128 diff --git a/OneLLM-Doey-V1-Llama-3.2-3B.Q4_K_S.gguf b/OneLLM-Doey-V1-Llama-3.2-3B.Q4_K_S.gguf new file mode 100644 index 0000000..651ca13 --- /dev/null +++ b/OneLLM-Doey-V1-Llama-3.2-3B.Q4_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63537b35a08ebec9120909faccdeb0e4aff66facf9559d2817044463e7a19239 +size 2149827168 diff --git a/OneLLM-Doey-V1-Llama-3.2-3B.Q5_0.gguf b/OneLLM-Doey-V1-Llama-3.2-3B.Q5_0.gguf new file mode 100644 index 0000000..3cc0a5c --- /dev/null +++ b/OneLLM-Doey-V1-Llama-3.2-3B.Q5_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8853403650eb0b2b95f3b6ea495b79a9e2c65418afcd779e64fdd0ac4ede32e0 +size 2540388960 diff --git a/OneLLM-Doey-V1-Llama-3.2-3B.Q5_1.gguf b/OneLLM-Doey-V1-Llama-3.2-3B.Q5_1.gguf new file mode 100644 index 0000000..dad6c34 --- /dev/null +++ b/OneLLM-Doey-V1-Llama-3.2-3B.Q5_1.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab9e25fae81ad0edd83012a0fc54865ccd8bb3494fa0efd94d9806446d17ed36 +size 2741174880 diff --git a/OneLLM-Doey-V1-Llama-3.2-3B.Q5_K_M.gguf b/OneLLM-Doey-V1-Llama-3.2-3B.Q5_K_M.gguf new file mode 100644 index 0000000..be20385 --- /dev/null +++ b/OneLLM-Doey-V1-Llama-3.2-3B.Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:262338510baa6869279ca64a78b8d90c876524df2a61f342b4ddd8ee1a8a3b63 +size 2593030752 diff --git a/OneLLM-Doey-V1-Llama-3.2-3B.Q5_K_S.gguf b/OneLLM-Doey-V1-Llama-3.2-3B.Q5_K_S.gguf new file mode 100644 index 0000000..599471d --- /dev/null +++ b/OneLLM-Doey-V1-Llama-3.2-3B.Q5_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98e50d8bbaa410b8914ea5890b0058e8db4c37834b32592fb95ff4fbde4b8f82 +size 2540388960 diff --git a/OneLLM-Doey-V1-Llama-3.2-3B.Q6_K.gguf b/OneLLM-Doey-V1-Llama-3.2-3B.Q6_K.gguf new file mode 100644 index 0000000..5609643 --- /dev/null +++ b/OneLLM-Doey-V1-Llama-3.2-3B.Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d89b6f1183fa2896884f90854e067ca538d13aa4e6c2166e5fece6ba8a1de2d7 +size 2967059040 diff --git a/OneLLM-Doey-V1-Llama-3.2-3B.Q8_0.gguf b/OneLLM-Doey-V1-Llama-3.2-3B.Q8_0.gguf new file mode 100644 index 0000000..f42f8b1 --- /dev/null +++ b/OneLLM-Doey-V1-Llama-3.2-3B.Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d5d708f7b21322021896ade7d8067397c3b3fe709a3546a9fccb95c881a9ed83 +size 3840526944 diff --git a/README.md b/README.md new file mode 100644 index 0000000..9a57f64 --- /dev/null +++ b/README.md @@ -0,0 +1,132 @@ + +--- + +license: apache-2.0 +datasets: +- nvidia/ChatQA-Training-Data +language: +- en +base_model: +- meta-llama/Llama-3.2-3B +pipeline_tag: text-generation +library_name: transformers + +--- + +[![QuantFactory Banner](https://lh7-rt.googleusercontent.com/docsz/AD_4nXeiuCm7c8lEwEJuRey9kiVZsRn2W-b4pWlu3-X534V3YmVuVc2ZL-NXg2RkzSOOS2JXGHutDuyyNAUtdJI65jGTo8jT9Y99tMi4H4MqL44Uc5QKG77B0d6-JfIkZHFaUA71-RtjyYZWVIhqsNZcx8-OMaA?key=xt3VSDoCbmTY7o-cwwOFwQ)](https://hf.co/QuantFactory) + + +# QuantFactory/OneLLM-Doey-V1-Llama-3.2-3B-GGUF +This is quantized version of [DoeyLLM/OneLLM-Doey-V1-Llama-3.2-3B](https://huggingface.co/DoeyLLM/OneLLM-Doey-V1-Llama-3.2-3B) created using llama.cpp + +# Original Model Card + +## **Model Summary** +This model is a fine-tuned version of **LLaMA 3.2-3B**, optimized using **LoRA (Low-Rank Adaptation)** on the [NVIDIA ChatQA-Training-Data](https://huggingface.co/datasets/nvidia/ChatQA-Training-Data). It is tailored for conversational AI, question answering, and other instruction-following tasks, with support for sequences up to 1024 tokens. + +--- + +## **Key Features** +- **Base Model**: LLaMA 3.2-3B +- **Fine-Tuning Framework**: LoRA +- **Dataset**: NVIDIA ChatQA-Training-Data +- **Max Sequence Length**: 1024 tokens +- **Use Case**: Instruction-based tasks, question answering, conversational AI. + +## **Model Usage** +This fine-tuned model is suitable for: +- **Conversational AI**: Chatbots and dialogue agents with improved contextual understanding. +- **Question Answering**: Generating concise and accurate answers to user queries. +- **Instruction Following**: Responding to structured prompts. +- **Long-Context Tasks**: Processing sequences up to 1024 tokens for long-text reasoning. + +# **How to Use DoeyLLM / OneLLM-Doey-V1-Llama-3.2-3B-Instruct** + +This guide explains how to use the **DoeyLLM** model on both app (iOS) and PC platforms. + +--- + +## **App (iOS): Use with OneLLM** + +OneLLM brings versatile large language models (LLMs) to your device—Llama, Gemma, Qwen, Mistral, and more. Enjoy private, offline GPT and AI tools tailored to your needs. + +With OneLLM, experience the capabilities of leading-edge language models directly on your device, all without an internet connection. Get fast, reliable, and intelligent responses, while keeping your data secure with local processing. + +### **Quick Start for iOS** + +Follow these steps to integrate the **DoeyLLM** model using the OneLLM app: + +1. **Download OneLLM** + Get the app from the [App Store](https://apps.apple.com/us/app/onellm-private-ai-gpt-llm/id6737907910) and install it on your iOS device. + +2. **Load the DoeyLLM Model** + Use the OneLLM interface to load the DoeyLLM model directly into the app: + - Navigate to the **Model Library**. + - Search for `DoeyLLM`. + - Select the model and tap **Download** to store it locally on your device. +3. **Start Conversing** + Once the model is loaded, you can begin interacting with it through the app's chat interface. For example: + - Tap the **Chat** tab. + - Type your question or prompt, such as: + > "Explain the significance of AI in education." + - Receive real-time, intelligent responses generated locally. + +### **Key Features of OneLLM** +- **Versatile Models**: Supports various LLMs, including Llama, Gemma, and Qwen. +- **Private & Secure**: All processing occurs locally on your device, ensuring data privacy. +- **Offline Capability**: Use the app without requiring an internet connection. +- **Fast Performance**: Optimized for mobile devices, delivering low-latency responses. + +For more details or support, visit the [OneLLM App Store page](https://apps.apple.com/us/app/onellm-private-ai-gpt-llm/id6737907910). + +## **PC: Use with Transformers** + +The DoeyLLM model can also be used on PC platforms through the `transformers` library, enabling robust and scalable inference for various NLP tasks. + +### **Quick Start for PC** +Follow these steps to use the model with Transformers: + +1. **Install Transformers** + Ensure you have `transformers >= 4.43.0` installed. Update or install it via pip: + + ```bash + pip install --upgrade transformers + +2. **Load the Model** + Use the transformers library to load the model and tokenizer: + +Starting with `transformers >= 4.43.0` onward, you can run conversational inference using the Transformers `pipeline` abstraction or by leveraging the Auto classes with the `generate()` function. + +Make sure to update your transformers installation via `pip install --upgrade transformers`. + +```python +import torch +from transformers import pipeline + +model_id = "OneLLM-Doey-V1-Llama-3.2-3B" +pipe = pipeline( + "text-generation", + model=model_id, + torch_dtype=torch.bfloat16, + device_map="auto", +) +messages = [ + {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"}, + {"role": "user", "content": "Who are you?"}, +] +outputs = pipe( + messages, + max_new_tokens=256, +) +print(outputs[0]["generated_text"][-1]) +``` + + + +## Responsibility & Safety + +As part of our responsible release strategy, we adopted a three-pronged approach to managing trust and safety risks: + +Enable developers to deploy helpful, safe, and flexible experiences for their target audience and the use cases supported by the model. +Protect developers from adversarial users attempting to exploit the model’s capabilities to potentially cause harm. +Provide safeguards for the community to help prevent the misuse of the model. diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file