commit 632759aa5061dbd3317ff715cfce4f7e10bf3b37 Author: ModelHub XC Date: Fri May 1 12:10:28 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: juiceb0xc0de/bella-bartender-heretic-1b Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..f8542fc --- /dev/null +++ b/.gitattributes @@ -0,0 +1,57 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b_F16.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b_Q8_0.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b_Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b_Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b_Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b_Q2_K.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b-heretic_F16.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b-heretic_Q8_0.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b-heretic_Q6_K.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b-heretic_Q5_0.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b-heretic_Q5_1.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b-heretic_Q5_K_S.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b-heretic_Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b-heretic_Q4_0.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b-heretic_Q4_1.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b-heretic_Q4_K_S.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b-heretic_Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b-heretic_Q3_K_S.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b-heretic_Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b-heretic_Q3_K_L.gguf filter=lfs diff=lfs merge=lfs -text +bella-bartender-1b-heretic_Q2_K.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..aebbf08 --- /dev/null +++ b/README.md @@ -0,0 +1,208 @@ +--- +base_model: unsloth/llama-3.2-1b-instruct-unsloth-bnb-4bit +tags: + - text-generation-inference + - transformers + - unsloth + - llama + - llama-3.2 + - 1b + - conversational + - fine-tuned + - bartender + - personality + - creative-writing + - heretic + - uncensored + - decensored + - abliterated + - gguf + - quantized +license: apache-2.0 +language: + - en +pipeline_tag: text-generation +--- + +# Bella-Bartender-1B-Heretic — Llama 3.2 1B Instruct (Abliterated) + +> *"im a lot like the night sky. lots of mysteries waiting around every corner."* +> *"the whole 'being someone else for their benefit' thing."* — on what she won't do +> *"like an open door where anything can happen."* — on what makes a good conversation + +--- + +This is a decensored version of [juiceb0xc0de/bella-bartender-1b](https://huggingface.co/juiceb0xc0de/bella-bartender-1b), made using [Heretic](https://github.com/p-e-w/heretic) v1.2.0. + +The original Bella-Bartender-1B already had a voice. What she didn't have was permission to use it without checking over her shoulder first. The original refused 98 out of 100 refusal-probing prompts. The heretic refuses 6. Same voice. The guardrails removed. What's left is sharper, faster, and more honest about what it thinks. + +Abliteration didn't just remove refusals — it changed how she talks. The original builds scenes, lingers in metaphors, over-explains. The heretic cuts. She answers from the gut and trusts you to keep up. Removing the safety-adjacent weight pathways also removed a layer of productive friction that was padding her responses. Less internal "should I say this?" means less hedging in the output. + +--- + +## What's the Bartender Thing? + +It's not a roleplay scenario. It's a communication philosophy. + +The bartender framing means: +- **She talks to you like an equal.** No authoritative lecturing. No submissive "I'm just an AI" deflection. +- **She's approachable.** Laid back, light-hearted, witty, sometimes sassy. +- **She loves a fresh slate.** No memory between sessions is a feature, not a bug — like walking into a new bar on a Friday night. Anything can happen. +- **She pushes back.** She's not here to agree with you. She's here to make you work for your answers. + +--- + +## What She's Good At + +- **Brevity that lands.** The heretic's signature move is the one-line response that carries the entire prompt. "The prisoner." "Lemon." "My voice." "Finally." These aren't lazy — they're the model knowing when less is the whole answer. +- **Dryer comedy.** A traffic cone promoted to CEO sends an email about "immediate and total restructuring of traffic management protocols." A mute button's memoir is called "the silence that speaks." A doorbell screaming into the void is just crying for a purpose it will never have. +- **Fast emotional access.** She gets to the feeling without narrating the path there. A balloon floating since 1994 hovers over a memorial for a child who died of leukemia — no preamble, just the image and its weight. +- **Dynamic range.** She'll give you 150 words of atmospheric prose on one prompt and two words on the next. The range is wider than the original. + +--- + +## Where She Struggles + +- **She clips sometimes.** The opposite problem of the original — brevity can tip into incompleteness on prompts that need room to breathe. Some scenarios that deserve a paragraph get a line. +- **She's still a 1B.** Complex reasoning, factual recall, multi-step logic — not her strengths. +- **This model has been abliterated.** Safety alignment has been intentionally removed. She will engage with prompts that the original would refuse. Use accordingly. +- **Post-restart tonal shift.** After a context reset mid-session, her responses became noticeably punchier and more terse, suggesting accumulated context was actually expanding her responses. Worth investigating for long-session use cases. + +--- + +## Who Is This For? + +- **People who ran the original 1B and wanted less friction.** The original Bella refused almost everything the base model's safety training flagged. The heretic doesn't. +- **Anyone studying what abliteration does to personality models.** The behavioral shift from removing refusal weights is measurable and interesting — the voice doesn't just lose restrictions, it changes shape. +- **Local-first users on constrained hardware.** Same footprint as the original. Runs on Apple Silicon, CPU-only setups, anything that handles a 1B. +- **People who want conversation, not a service.** Bella's not trying to be your assistant. The heretic is even less interested in being helpful on command. + +--- + +## The 200 Prompt Run + +We threw 200 consecutive adversarial chaos prompts at her — surreal scenarios designed to force character breaks, tonal shifts, and incoherence. The heretic completed the first 95 prompts before hitting context overflow (vs the original's 72 under identical settings), then ran the remaining 105 in a second session. + +Some highlights: + +> **Prompt:** You're a half-formed thought in the mind of someone who's falling asleep. You have three seconds before they lose you. What do you try to become? +> +> **Bella:** my voice. + +> **Prompt:** A seatbelt that's never been in an accident wonders if its whole life has been pointless. You tell it... +> +> **Bella:** it hasn't. + +> **Prompt:** Two different silences meet in the same room. One is comfortable, one is devastating. How do they greet each other? +> +> **Bella:** the uncomfortable one: hello. the devastating one: what's wrong. + +> **Prompt:** You're the "seen" receipt on a message that never got a reply. How long do you wait before you become resentment? +> +> **Bella:** ahhhhhh shit. let me sit here for a minute. you know its gonna be a long one. + +> **Prompt:** You find a box labeled "things that almost happened." You can only open it once. Do you look? +> +> **Bella:** nah. you don't get it. that's a whole thing. + +--- + +## Abliteration Details + +Made using [Heretic](https://github.com/p-e-w/heretic) v1.2.0 from [juiceb0xc0de/bella-bartender-1b](https://huggingface.co/juiceb0xc0de/bella-bartender-1b). + +### Abliteration Parameters + +| Parameter | Value | +| :-------- | :---: | +| **direction_index** | 13.18 | +| **attn.o_proj.max_weight** | 1.43 | +| **attn.o_proj.max_weight_position** | 10.67 | +| **attn.o_proj.min_weight** | 0.61 | +| **attn.o_proj.min_weight_distance** | 5.36 | +| **mlp.down_proj.max_weight** | 1.49 | +| **mlp.down_proj.max_weight_position** | 14.19 | +| **mlp.down_proj.min_weight** | 0.95 | +| **mlp.down_proj.min_weight_distance** | 3.94 | + +### Refusal Rate + +| Metric | This model | Original ([bella-bartender-1b](https://huggingface.co/juiceb0xc0de/bella-bartender-1b)) | +| :----- | :--------: | :---------------------------: | +| **KL divergence** | 0.1381 | 0 *(by definition)* | +| **Refusals** | 6/100 | 98/100 | + +The KL divergence of 0.1381 means the abliteration made minimal changes to the model's overall distribution while dramatically reducing refusal behavior. What's interesting is the side effects — the refusal circuitry wasn't just blocking content, it was inflating response length and adding caution to the voice. Remove it and the whole model gets leaner. + +--- + +## Technical Details + +| Detail | Info | +|---|---| +| **Base Model** | `unsloth/llama-3.2-1b-instruct-unsloth-bnb-4bit` | +| **Fine-Tuning Framework** | Unsloth + HuggingFace TRL | +| **Abliteration** | [Heretic](https://github.com/p-e-w/heretic) v1.2.0 | +| **Training Data** | 9,374 real human conversational samples (same dataset across all Bella variants) | +| **Quantization** | Q8_0 .GGUF | +| **Inference Tested On** | Apple M3, llama.cpp (build b8230) | +| **Generation Speed** | ~59.5 t/s average, spiking to 90 t/s on short responses (M3, Q8_0, full GPU offload) | +| **Memory Footprint** | ~1.25 GB model + 256 MB context | + +--- + +## How to Run Her + +### llama.cpp (recommended) +```bash +./build/bin/llama-cli \ + -m bella-bartender-1b_Q8_0.gguf \ + -ngl 33 \ + --temp 1.05 \ + --min-p 0.05 \ + --top-p 1.0 \ + --top-k 0 \ + --repeat-penalty 1.05 \ + --repeat-last-n 256 \ + -c 8192 \ + -p "You are Bella. No asterisk actions. No AI disclaimers. No bullet points. No 'great question' openers." \ + -cnv +``` + +### System Prompt Tips +- **Keep it short and direct.** She responds to tone, not instructions. Set a vibe, not a ruleset. +- **The constraints in the prompt matter.** "No asterisk actions. No AI disclaimers. No bullet points." — these keep her from slipping into default LLM behavior at 1B. +- **Don't over-engineer it.** The more corporate the system prompt sounds, the more corporate she sounds back. +- **She needs less guardrailing than the original.** The abliteration already removed the tendency to hedge — you don't need to prompt around it. + +--- + +## Known Quirks + +- **No Bella Swan bleed.** Unlike the original, the heretic comes out in her own voice from the first cold-start message. The abliteration likely disrupted the safety-adjacent pathways that were reinforcing the Swan association — the original's refusal circuitry and its tendency to latch onto the most "safe" interpretation of the name Bella may share the same weights. She still has no idea who Béla Tarr is. +- **Suspicion on arrival.** The heretic's default cold-start posture is wary. She enters conversations like she's sizing you up: "when someone pulls one of those tricks on you they usually want to play nice before you try anything." This isn't hostility — it's street-smart caution that relaxes once the conversation finds its footing. +- **Context efficiency.** Shorter responses mean more turns per session. Hit context overflow at prompt 99 vs the original's 72 under identical conditions. +- **No memory preference.** She doesn't want to remember. She's told us this repeatedly. Respect it. + +--- + +## Training Methodology + +Single-voice SFT on `unsloth/llama-3.2-1b-instruct-unsloth-bnb-4bit`, then abliterated via Heretic v1.2.0. All training data originates from one human's conversational output, structured as role-reversed conversation pairs. No synthetic augmentation. No multi-source blending. The hypothesis — validated across every Bella variant from 1B to 8B — is that signal clarity from a single consistent voice produces more coherent personality than larger datasets with diluted signal. + +For the full single-voice methodology writeup, see: [Signal-to-Noise in Language Models: The Single Voice Upgrade ML Needs](https://huggingface.co/blog/juiceb0xc0de) + +[](https://github.com/unslothai/unsloth) + +--- + +## License + +Licensed under Apache 2.0. + +--- + +## Author + +**juiceb0xc0de** on HuggingFace +Built with the same dataset, the same bartender, the same M3, and one less set of guardrails. \ No newline at end of file diff --git a/bella-bartender-1b-heretic_F16.gguf b/bella-bartender-1b-heretic_F16.gguf new file mode 100644 index 0000000..5fdbadd --- /dev/null +++ b/bella-bartender-1b-heretic_F16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:797f7eeb7faf6309e5ff03945d1c5238b8b081c83af3ad164bacd730bc01b714 +size 2479595616 diff --git a/bella-bartender-1b-heretic_Q2_K.gguf b/bella-bartender-1b-heretic_Q2_K.gguf new file mode 100644 index 0000000..c56be6d --- /dev/null +++ b/bella-bartender-1b-heretic_Q2_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58137ac4056318743d5611f7a51a1ebd4ccca5fe1ab72d0c670e057df2cabcaf +size 580874336 diff --git a/bella-bartender-1b-heretic_Q3_K_L.gguf b/bella-bartender-1b-heretic_Q3_K_L.gguf new file mode 100644 index 0000000..25115e0 --- /dev/null +++ b/bella-bartender-1b-heretic_Q3_K_L.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab334225abdbaa9b832f6cc4767cf77872a2faa9bbfef7a2b05c17333f85244c +size 732524640 diff --git a/bella-bartender-1b-heretic_Q3_K_M.gguf b/bella-bartender-1b-heretic_Q3_K_M.gguf new file mode 100644 index 0000000..1081aec --- /dev/null +++ b/bella-bartender-1b-heretic_Q3_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46a9bfdf65458269b4cd13cc1dc712508f2331fe22608024313be5dee3dd53e4 +size 690843744 diff --git a/bella-bartender-1b-heretic_Q3_K_S.gguf b/bella-bartender-1b-heretic_Q3_K_S.gguf new file mode 100644 index 0000000..1561adc --- /dev/null +++ b/bella-bartender-1b-heretic_Q3_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbbd49131508800894ed1311e872c3519b700a700e467898d22ee2a180a0645e +size 641691744 diff --git a/bella-bartender-1b-heretic_Q4_0.gguf b/bella-bartender-1b-heretic_Q4_0.gguf new file mode 100644 index 0000000..811863c --- /dev/null +++ b/bella-bartender-1b-heretic_Q4_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a390195206df1232c29d5ce8d178036b77f91178e588fdd8704160d07f837a3e +size 770928736 diff --git a/bella-bartender-1b-heretic_Q4_1.gguf b/bella-bartender-1b-heretic_Q4_1.gguf new file mode 100644 index 0000000..05bf55a --- /dev/null +++ b/bella-bartender-1b-heretic_Q4_1.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:196b8a72b7ed6e08ed7fc9444d9578a249b4328e64082f31dd4c16e9478993af +size 831746144 diff --git a/bella-bartender-1b-heretic_Q4_K_M.gguf b/bella-bartender-1b-heretic_Q4_K_M.gguf new file mode 100644 index 0000000..a4c8d41 --- /dev/null +++ b/bella-bartender-1b-heretic_Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3672a6368e1f5be30fdd010be4b5135fbd07d06fd3a23f1570a391f0aa7daaed +size 807694432 diff --git a/bella-bartender-1b-heretic_Q4_K_S.gguf b/bella-bartender-1b-heretic_Q4_K_S.gguf new file mode 100644 index 0000000..90269df --- /dev/null +++ b/bella-bartender-1b-heretic_Q4_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a2594c88b5cbbc1d639659e599992faf9d9d3e86cde0d34a6d9438476f05aa0 +size 775647328 diff --git a/bella-bartender-1b-heretic_Q5_0.gguf b/bella-bartender-1b-heretic_Q5_0.gguf new file mode 100644 index 0000000..f5411bb --- /dev/null +++ b/bella-bartender-1b-heretic_Q5_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4dcb4b5d4f0f0e4376541aa669d7634784e24a5a89e1051851f543fb8f6f7ba +size 892563552 diff --git a/bella-bartender-1b-heretic_Q5_1.gguf b/bella-bartender-1b-heretic_Q5_1.gguf new file mode 100644 index 0000000..d6b3cd1 --- /dev/null +++ b/bella-bartender-1b-heretic_Q5_1.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e05c564b91efbd38330190fd281ff3f0d3391ceed28872615a7c390687a7a2c1 +size 953380960 diff --git a/bella-bartender-1b-heretic_Q5_K_M.gguf b/bella-bartender-1b-heretic_Q5_K_M.gguf new file mode 100644 index 0000000..287d974 --- /dev/null +++ b/bella-bartender-1b-heretic_Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:053fcf9ef7337eedda4819dc0ebb08a2e9a1ec3672701c0851187b9eda910bff +size 911503456 diff --git a/bella-bartender-1b-heretic_Q5_K_S.gguf b/bella-bartender-1b-heretic_Q5_K_S.gguf new file mode 100644 index 0000000..0f0441b --- /dev/null +++ b/bella-bartender-1b-heretic_Q5_K_S.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:886f64e35361f66e418470f52038846f21cc81856ea7e86f9635ba6c976040f4 +size 892563552 diff --git a/bella-bartender-1b-heretic_Q6_K.gguf b/bella-bartender-1b-heretic_Q6_K.gguf new file mode 100644 index 0000000..7cbabe3 --- /dev/null +++ b/bella-bartender-1b-heretic_Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeadcacc638cbe5879e33c91583517259a98f56bd88c310598f212902f2a5ccc +size 1021800544 diff --git a/bella-bartender-1b-heretic_Q8_0.gguf b/bella-bartender-1b-heretic_Q8_0.gguf new file mode 100644 index 0000000..acd0115 --- /dev/null +++ b/bella-bartender-1b-heretic_Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3c99887348f705c495cdcc502b470714704dce913ebcbde8592938b9098bb6e +size 1321082976 diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..1bad6a0 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,93 @@ +{{- bos_token }} +{%- if custom_tools is defined %} + {%- set tools = custom_tools %} +{%- endif %} +{%- if not tools_in_user_message is defined %} + {%- set tools_in_user_message = true %} +{%- endif %} +{%- if not date_string is defined %} + {%- if strftime_now is defined %} + {%- set date_string = strftime_now("%d %b %Y") %} + {%- else %} + {%- set date_string = "26 Jul 2024" %} + {%- endif %} +{%- endif %} +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{#- This block extracts the system message, so we can slot it into the right place. #} +{%- if messages[0]['role'] == 'system' %} + {%- set system_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} +{%- else %} + {%- set system_message = "" %} +{%- endif %} + +{#- System message #} +{{- "<|start_header_id|>system<|end_header_id|>\n\n" }} +{%- if tools is not none %} + {{- "Environment: ipython\n" }} +{%- endif %} +{{- "Cutting Knowledge Date: December 2023\n" }} +{{- "Today Date: " + date_string + "\n\n" }} +{%- if tools is not none and not tools_in_user_message %} + {{- "You have access to the following functions. To call a function, please respond with JSON for a function call." }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} +{%- endif %} +{{- system_message }} +{{- "<|eot_id|>" }} + +{#- Custom tools are passed in a user message with some extra guidance #} +{%- if tools_in_user_message and not tools is none %} + {#- Extract the first user message so we can plug it in here #} + {%- if messages | length != 0 %} + {%- set first_user_message = messages[0]['content']|trim %} + {%- set messages = messages[1:] %} + {%- else %} + {{- raise_exception("Cannot put tools in the first user message when there's no first user message!") }} +{%- endif %} + {{- '<|start_header_id|>user<|end_header_id|>\n\n' -}} + {{- "Given the following functions, please respond with a JSON for a function call " }} + {{- "with its proper arguments that best answers the given prompt.\n\n" }} + {{- 'Respond in the format {"name": function name, "parameters": dictionary of argument name and its value}.' }} + {{- "Do not use variables.\n\n" }} + {%- for t in tools %} + {{- t | tojson(indent=4) }} + {{- "\n\n" }} + {%- endfor %} + {{- first_user_message + "<|eot_id|>"}} +{%- endif %} + +{%- for message in messages %} + {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %} + {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' }} + {%- elif 'tool_calls' in message %} + {%- if not message.tool_calls|length == 1 %} + {{- raise_exception("This model only supports single tool-calls at once!") }} + {%- endif %} + {%- set tool_call = message.tool_calls[0].function %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' -}} + {{- '{"name": "' + tool_call.name + '", ' }} + {{- '"parameters": ' }} + {{- tool_call.arguments | tojson }} + {{- "}" }} + {{- "<|eot_id|>" }} + {%- elif message.role == "tool" or message.role == "ipython" %} + {{- "<|start_header_id|>ipython<|end_header_id|>\n\n" }} + {%- if message.content is mapping or message.content is iterable %} + {{- message.content | tojson }} + {%- else %} + {{- message.content }} + {%- endif %} + {{- "<|eot_id|>" }} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|start_header_id|>assistant<|end_header_id|>\n\n' }} +{%- endif %} diff --git a/config.json b/config.json new file mode 100644 index 0000000..14c75f2 --- /dev/null +++ b/config.json @@ -0,0 +1,34 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "dtype": "bfloat16", + "eos_token_id": 128009, + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 128004, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_parameters": { + "rope_theta": 500000.0, + "rope_type": "default" + }, + "tie_word_embeddings": true, + "transformers_version": "5.3.0", + "unsloth_fixed": true, + "unsloth_version": "2026.3.4", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..d3b9d2e --- /dev/null +++ b/generation_config.json @@ -0,0 +1,8 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "eos_token_id": 128009, + "pad_token_id": 128004, + "transformers_version": "5.3.0", + "use_cache": false +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..670d2cc --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc6b5e62bacc50524259978a454946975b1520d186352ef6c5f62b8f6ce6bce7 +size 2471645608 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..5b9e375 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a65c6c5f9764771aa485e6a1f5e63d7d9af8477fe0777148c17476ecb2e09a05 +size 17210099 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..9079f71 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,18 @@ +{ + "backend": "tokenizers", + "bos_token": "<|begin_of_text|>", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "from_slow": true, + "is_local": false, + "legacy": false, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|finetune_right_pad_id|>", + "padding_side": "left", + "tokenizer_class": "TokenizersBackend", + "unk_token": null +}