commit df09387ebf3774981576b5260169318ea692ebfb Author: ModelHub XC Date: Tue Apr 21 18:24:45 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: fs90/Llama-3.2-1B-Instruct-bnb-4bit-lima-GGUF Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..fc28246 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,43 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +model.F16.gguf filter=lfs diff=lfs merge=lfs -text +model.Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +model.Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +model.Q8_0.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3.2-1B-Instruct-bnb-4bit-lima-F16.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3.2-1B-Instruct-bnb-4bit-lima-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3.2-1B-Instruct-bnb-4bit-lima-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +Llama-3.2-1B-Instruct-bnb-4bit-lima-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/Llama-3.2-1B-Instruct-bnb-4bit-lima-F16.gguf b/Llama-3.2-1B-Instruct-bnb-4bit-lima-F16.gguf new file mode 100644 index 0000000..3b98cfe --- /dev/null +++ b/Llama-3.2-1B-Instruct-bnb-4bit-lima-F16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d6af89535ffb94e98a0a50020b3086fb5be0bd079c2b94d3e5cfdbaeb6940f8 +size 2479595584 diff --git a/Llama-3.2-1B-Instruct-bnb-4bit-lima-Q4_K_M.gguf b/Llama-3.2-1B-Instruct-bnb-4bit-lima-Q4_K_M.gguf new file mode 100644 index 0000000..5018fcb --- /dev/null +++ b/Llama-3.2-1B-Instruct-bnb-4bit-lima-Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27e52fd910a09abc3ed3e8c9e923f24a795492fc85ce5790330d78413eaec4d2 +size 807694400 diff --git a/Llama-3.2-1B-Instruct-bnb-4bit-lima-Q6_K.gguf b/Llama-3.2-1B-Instruct-bnb-4bit-lima-Q6_K.gguf new file mode 100644 index 0000000..ede4e8e --- /dev/null +++ b/Llama-3.2-1B-Instruct-bnb-4bit-lima-Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:adefada17fddd0e20c886e2a63240b4d5c8b6f10f6a70424b02876ee7e91ec6d +size 1021800512 diff --git a/Llama-3.2-1B-Instruct-bnb-4bit-lima-Q8_0.gguf b/Llama-3.2-1B-Instruct-bnb-4bit-lima-Q8_0.gguf new file mode 100644 index 0000000..ee72ccd --- /dev/null +++ b/Llama-3.2-1B-Instruct-bnb-4bit-lima-Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b241a0d629863eed1d4a0505edfc096d1a9bef5e960af5f910d98cafd50ca18d +size 1321082944 diff --git a/Modelfile b/Modelfile new file mode 100644 index 0000000..fb9afb0 --- /dev/null +++ b/Modelfile @@ -0,0 +1,68 @@ +# Modelfile for Ollama (GGUF) +# Auto-generated using Unsloth's template mapper +# This uses the Llama-3.2-1B-Instruct-bnb-4bit-lima-Q4_K_M.gguf quantization +# +# Note: You can change the FROM line to use a different quantization +# Available quantizations in this directory: +# - Llama-3.2-1B-Instruct-bnb-4bit-lima-F16.gguf +# - Llama-3.2-1B-Instruct-bnb-4bit-lima-Q8_0.gguf +# - Llama-3.2-1B-Instruct-bnb-4bit-lima-Q6_K.gguf +# - Llama-3.2-1B-Instruct-bnb-4bit-lima-Q4_K_M.gguf + + +FROM ./Llama-3.2-1B-Instruct-bnb-4bit-lima-Q4_K_M.gguf +TEMPLATE """{{ if .Messages }} +{{- if or .System .Tools }}<|start_header_id|>system<|end_header_id|> +{{- if .System }} + +{{ .System }} +{{- end }} +{{- if .Tools }} + +You are a helpful assistant with tool calling capabilities. When you receive a tool call response, use the output to format an answer to the original use question. +{{- end }} +{{- end }}<|eot_id|> +{{- range $i, $_ := .Messages }} +{{- $last := eq (len (slice $.Messages $i)) 1 }} +{{- if eq .Role "user" }}<|start_header_id|>user<|end_header_id|> +{{- if and $.Tools $last }} + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables. + +{{ $.Tools }} +{{- end }} + +{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|> + +{{ end }} +{{- else if eq .Role "assistant" }}<|start_header_id|>assistant<|end_header_id|> +{{- if .ToolCalls }} + +{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "parameters": {{ .Function.Arguments }}}{{ end }} +{{- else }} + +{{ .Content }}{{ if not $last }}<|eot_id|>{{ end }} +{{- end }} +{{- else if eq .Role "tool" }}<|start_header_id|>ipython<|end_header_id|> + +{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|> + +{{ end }} +{{- end }} +{{- end }} +{{- else }} +{{- if .System }}<|start_header_id|>system<|end_header_id|> + +{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|> + +{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|> + +{{ end }}{{ .Response }}{{ if .Response }}<|eot_id|>{{ end }}""" +PARAMETER stop "<|start_header_id|>" +PARAMETER stop "<|end_header_id|>" +PARAMETER stop "<|eot_id|>" +PARAMETER stop "<|eom_id|>" +PARAMETER temperature 1.5 +PARAMETER min_p 0.1 \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..222f43f --- /dev/null +++ b/README.md @@ -0,0 +1,127 @@ +--- +base_model: unsloth/Llama-3.2-1B-Instruct-bnb-4bit +library_name: transformers +pipeline_tag: text-generation +tags: +- gguf +- fine-tuned +- lima +language: +- en +license: apache-2.0 +--- + +# Llama-3.2-1B-Instruct-bnb-4bit-lima - GGUF Format + +GGUF format quantizations for llama.cpp/Ollama. + +## Model Details + +- **Base Model**: [unsloth/Llama-3.2-1B-Instruct-bnb-4bit](https://huggingface.co/unsloth/Llama-3.2-1B-Instruct-bnb-4bit) +- **Format**: gguf +- **Dataset**: [GAIR/lima](https://huggingface.co/datasets/GAIR/lima) +- **Size**: 0.75 GB - 2.31 GB +- **Usage**: llama.cpp / Ollama + +## Related Models + +- **LoRA Adapters**: [fs90/Llama-3.2-1B-Instruct-bnb-4bit-lima-lora](https://huggingface.co/fs90/Llama-3.2-1B-Instruct-bnb-4bit-lima-lora) - Smaller LoRA-only adapters +- **Merged FP16 Model**: [fs90/Llama-3.2-1B-Instruct-bnb-4bit-lima](https://huggingface.co/fs90/Llama-3.2-1B-Instruct-bnb-4bit-lima) - Original unquantized model in FP16 + + +## Prompt Format + +This model uses the **Llama 3.2** chat template. + +### Ollama Template Format + +``` +{{ if .Messages }} +{{- if or .System .Tools }}<|start_header_id|>system<|end_header_id|> +{{- if .System }} + +{{ .System }} +{{- end }} +{{- if .Tools }} + +You are a helpful assistant with tool calling capabilities. When you receive a tool call response, use the output to format an answer to the original use question. +{{- end }} +{{- end }}<|eot_id|> +{{- range $i, $_ := .Messages }} +{{- $last := eq (len (slice $.Messages $i)) 1 }} +{{- if eq .Role "user" }}<|start_header_id|>user<|end_header_id|> +{{- if and $.Tools $last }} + +Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt. + +Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}. Do not use variables. + +{{ $.Tools }} +{{- end }} + +{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|> + +{{ end }} +{{- else if eq .Role "assistant" }}<|start_header_id|>assistant<|end_header_id|> +{{- if .ToolCalls }} + +{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "parameters": {{ .Function.Arguments }}}{{ end }} +{{- else }} + +{{ .Content }}{{ if not $last }}<|eot_id|>{{ end }} +{{- end }} +{{- else if eq .Role "tool" }}<|start_header_id|>ipython<|end_header_id|> + +{{ .Content }}<|eot_id|>{{ if $last }}<|start_header_id|>assistant<|end_header_id|> + +{{ end }} +{{- end }} +{{- end }} +{{- else }} +{{- if .System }}<|start_header_id|>system<|end_header_id|> + +{{ .System }}<|eot_id|>{{ end }}{{ if .Prompt }}<|start_header_id|>user<|end_header_id|> + +{{ .Prompt }}<|eot_id|>{{ end }}<|start_header_id|>assistant<|end_header_id|> + +{{ end }}{{ .Response }}{{ if .Response }}<|eot_id|>{{ end }} +``` + + +## Training Details + +- **LoRA Rank**: 64 +- **Training Steps**: 480 +- **Training Loss**: 1.1123 +- **Max Seq Length**: 2048 +- **Training Scope**: 1,278 samples (3.0 epoch(s), full dataset) + +For complete training configuration, see the LoRA adapters repository/directory. + +## Available Quantizations + +| Quantization | File | Size | Quality | +|--------------|------|------|---------| +| **F16** | [Llama-3.2-1B-Instruct-bnb-4bit-lima-F16.gguf](Llama-3.2-1B-Instruct-bnb-4bit-lima-F16.gguf) | 2.31 GB | Full precision (largest) | +| **Q4_K_M** | [Llama-3.2-1B-Instruct-bnb-4bit-lima-Q4_K_M.gguf](Llama-3.2-1B-Instruct-bnb-4bit-lima-Q4_K_M.gguf) | 0.75 GB | Good balance (recommended) | +| **Q6_K** | [Llama-3.2-1B-Instruct-bnb-4bit-lima-Q6_K.gguf](Llama-3.2-1B-Instruct-bnb-4bit-lima-Q6_K.gguf) | 0.95 GB | High quality | +| **Q8_0** | [Llama-3.2-1B-Instruct-bnb-4bit-lima-Q8_0.gguf](Llama-3.2-1B-Instruct-bnb-4bit-lima-Q8_0.gguf) | 1.23 GB | Very high quality, near original | + +**Usage:** Use the dropdown menu above to select a quantization, then follow HuggingFace's provided instructions. + +## License + +Based on unsloth/Llama-3.2-1B-Instruct-bnb-4bit and trained on GAIR/lima. +Please refer to the original model and dataset licenses. + +## Credits + +**Trained by:** Farhan Syah + +**Training pipeline:** +- [unsloth-finetuning](https://github.com/farhan-syah/unsloth-finetuning) by [@farhan-syah](https://github.com/farhan-syah) +- [Unsloth](https://github.com/unslothai/unsloth) - 2x faster LLM fine-tuning + +**Base components:** +- Base model: [unsloth/Llama-3.2-1B-Instruct-bnb-4bit](https://huggingface.co/unsloth/Llama-3.2-1B-Instruct-bnb-4bit) +- Training dataset: [GAIR/lima](https://huggingface.co/datasets/GAIR/lima) by GAIR