初始化项目，由ModelHub XC社区提供模型

Model: Polygl0t/Tucano2-qwen-0.5B-Think Source: Original Platform
2026-06-04 14:20:49 +08:00
commit d2e30c7b41
22 changed files with 465611 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,41 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
 *.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 logo.png filter=lfs diff=lfs merge=lfs -text
 .plots/apo_gradient_norm.png filter=lfs diff=lfs merge=lfs -text
 .plots/apo_reward.png filter=lfs diff=lfs merge=lfs -text
 .plots/model_comparison.png filter=lfs diff=lfs merge=lfs -text
 .plots/sft_gradient_norm.png filter=lfs diff=lfs merge=lfs -text
 .plots/sft_loss.png filter=lfs diff=lfs merge=lfs -text
--- a/.plots/apo_gradient_norm.png
+++ b/.plots/apo_gradient_norm.png
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:0729b0ba79b79c3f9ae2becdfc986f9b9e7b6864e845292aaf78dc77e4535d93
 size 543980
--- a/.plots/apo_reward.png
+++ b/.plots/apo_reward.png
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:a7fa6778f74ca9cf434eda73696e2cbaa1144d42a6e61827364881ff55c36065
 size 286838
--- a/.plots/model_comparison.png
+++ b/.plots/model_comparison.png
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:caa80f85de1854e5ae25089c2bd1ab0c3991c59e14d5ee290d78710fb42a5cb4
 size 222035
--- a/.plots/sft_gradient_norm.png
+++ b/.plots/sft_gradient_norm.png
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:54f1bfe1d67832bff0989e695765bfb444633bb504aa4602c240e4a00763f3d2
 size 335390
--- a/.plots/sft_loss.png
+++ b/.plots/sft_loss.png
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:fb8c563e02344e1a42c0d66771ec2e6c07445b47470565ec3812aff525570d40
 size 387159
--- a/190
+++ b/190
@@ -0,0 +1,190 @@
 Apache License
                           Version 2.0, January 2004
                        http://www.apache.org/licenses/
   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
   1. Definitions.
      "License" shall mean the terms and conditions for use, reproduction,
      and distribution as defined by Sections 1 through 9 of this document.
      "Licensor" shall mean the copyright owner or entity authorized by
      the copyright owner that is granting the License.
      "Legal Entity" shall mean the union of the acting entity and all
      other entities that control, are controlled by, or are under common
      control with that entity. For the purposes of this definition,
      "control" means (i) the power, direct or indirect, to cause the
      direction or management of such entity, whether by contract or
      otherwise, or (ii) ownership of fifty percent (50%) or more of the
      outstanding shares, or (iii) beneficial ownership of such entity.
      "You" (or "Your") shall mean an individual or Legal Entity
      exercising permissions granted by this License.
      "Source" form shall mean the preferred form for making modifications,
      including but not limited to software source code, documentation
      source, and configuration files.
      "Object" form shall mean any form resulting from mechanical
      transformation or translation of a Source form, including but
      not limited to compiled object code, generated documentation,
      and conversions to other media types.
      "Work" shall mean the work of authorship, whether in Source or
      Object form, made available under the License, as indicated by a
      copyright notice that is included in or attached to the work
      (an example is provided in the Appendix below).
      "Derivative Works" shall mean any work, whether in Source or Object
      form, that is based on (or derived from) the Work and for which the
      editorial revisions, annotations, elaborations, or other modifications
      represent, as a whole, an original work of authorship. For the purposes
      of this License, Derivative Works shall not include works that remain
      separable from, or merely link (or bind by name) to the interfaces of,
      the Work and Derivative Works thereof.
      "Contribution" shall mean any work of authorship, including
      the original version of the Work and any modifications or additions
      to that Work or Derivative Works thereof, that is intentionally
      submitted to Licensor for inclusion in the Work by the copyright owner
      or by an individual or Legal Entity authorized to submit on behalf of
      the copyright owner. For the purposes of this definition, "submitted"
      means any form of electronic, verbal, or written communication sent
      to the Licensor or its representatives, including but not limited to
      communication on electronic mailing lists, source code control systems,
      and issue tracking systems that are managed by, or on behalf of, the
      Licensor for the purpose of discussing and improving the Work, but
      excluding communication that is conspicuously marked or otherwise
      designated in writing by the copyright owner as "Not a Contribution."
      "Contributor" shall mean Licensor and any individual or Legal Entity
      on behalf of whom a Contribution has been received by Licensor and
      subsequently incorporated within the Work.
   2. Grant of Copyright License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      copyright license to reproduce, prepare Derivative Works of,
      publicly display, publicly perform, sublicense, and distribute the
      Work and such Derivative Works in Source or Object form.
   3. Grant of Patent License. Subject to the terms and conditions of
      this License, each Contributor hereby grants to You a perpetual,
      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
      (except as stated in this section) patent license to make, have made,
      use, offer to sell, sell, import, and otherwise transfer the Work,
      where such license applies only to those patent claims licensable
      by such Contributor that are necessarily infringed by their
      Contribution(s) alone or by combination of their Contribution(s)
      with the Work to which such Contribution(s) was submitted. If You
      institute patent litigation against any entity (including a
      cross-claim or counterclaim in a lawsuit) alleging that the Work
      or a Contribution incorporated within the Work constitutes direct
      or contributory patent infringement, then any patent licenses
      granted to You under this License for that Work shall terminate
      as of the date such litigation is filed.
   4. Redistribution. You may reproduce and distribute copies of the
      Work or Derivative Works thereof in any medium, with or without
      modifications, and in Source or Object form, provided that You
      meet the following conditions:
      (a) You must give any other recipients of the Work or
          Derivative Works a copy of this License; and
      (b) You must cause any modified files to carry prominent notices
          stating that You changed the files; and
      (c) You must retain, in the Source form of any Derivative Works
          that You distribute, all copyright, patent, trademark, and
          attribution notices from the Source form of the Work,
          excluding those notices that do not pertain to any part of
          the Derivative Works; and
      (d) If the Work includes a "NOTICE" text file as part of its
          distribution, then any Derivative Works that You distribute must
          include a readable copy of the attribution notices contained
          within such NOTICE file, excluding those notices that do not
          pertain to any part of the Derivative Works, in at least one
          of the following places: within a NOTICE text file distributed
          as part of the Derivative Works; within the Source form or
          documentation, if provided along with the Derivative Works; or,
          within a display generated by the Derivative Works, if and
          wherever such third-party notices normally appear. The contents
          of the NOTICE file are for informational purposes only and
          do not modify the License. You may add Your own attribution
          notices within Derivative Works that You distribute, alongside
          or as an addendum to the NOTICE text from the Work, provided
          that such additional attribution notices cannot be construed
          as modifying the License.
      You may add Your own copyright statement to Your modifications and
      may provide additional or different license terms and conditions
      for use, reproduction, or distribution of Your modifications, or
      for any such Derivative Works as a whole, provided Your use,
      reproduction, and distribution of the Work otherwise complies with
      the conditions stated in this License.
   5. Submission of Contributions. Unless You explicitly state otherwise,
      any Contribution intentionally submitted for inclusion in the Work
      by You to the Licensor shall be under the terms and conditions of
      this License, without any additional terms or conditions.
      Notwithstanding the above, nothing herein shall supersede or modify
      the terms of any separate license agreement you may have executed
      with Licensor regarding such Contributions.
   6. Trademarks. This License does not grant permission to use the trade
      names, trademarks, service marks, or product names of the Licensor,
      except as required for reasonable and customary use in describing the
      origin of the Work and reproducing the content of the NOTICE file.
   7. Disclaimer of Warranty. Unless required by applicable law or
      agreed to in writing, Licensor provides the Work (and each
      Contributor provides its Contributions) on an "AS IS" BASIS,
      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
      implied, including, without limitation, any warranties or conditions
      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
      PARTICULAR PURPOSE. You are solely responsible for determining the
      appropriateness of using or redistributing the Work and assume any
      risks associated with Your exercise of permissions under this License.
   8. Limitation of Liability. In no event and under no legal theory,
      whether in tort (including negligence), contract, or otherwise,
      unless required by applicable law (such as deliberate and grossly
      negligent acts) or agreed to in writing, shall any Contributor be
      liable to You for damages, including any direct, indirect, special,
      incidental, or consequential damages of any character arising as a
      result of this License or out of the use or inability to use the
      Work (including but not limited to damages for loss of goodwill,
      work stoppage, computer failure or malfunction, or any and all
      other commercial damages or losses), even if such Contributor
      has been advised of the possibility of such damages.
   9. Accepting Warranty or Additional Liability. While redistributing
      the Work or Derivative Works thereof, You may choose to offer,
      and charge a fee for, acceptance of support, warranty, indemnity,
      or other liability obligations and/or rights consistent with this
      License. However, in accepting such obligations, You may act only
      on Your own behalf and on Your sole responsibility, not on behalf
      of any other Contributor, and only if You agree to indemnify,
      defend, and hold each Contributor harmless for any liability
      incurred by, or claims asserted against, such Contributor by reason
      of your accepting any such warranty or additional liability.
   END OF TERMS AND CONDITIONS
   Copyright Nicholas Kluge Corrêa, Shiza Fatimah, Aniket Sen, and Sophia Falk 
   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at
       http://www.apache.org/licenses/LICENSE-2.0
   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
--- a/README.md
+++ b/README.md
@@ -0,0 +1,547 @@
 ---
 language:
  - pt
 license: apache-2.0
 library_name: transformers
 tags:
  - text-generation-inference
 datasets:
  - Polygl0t/gigaverbo-v2-sft
  - Polygl0t/gigaverbo-v2-preferences
 metrics:
  - perplexity
 pipeline_tag: text-generation
 widget:
  - text: "<|im_start|>user\nQual é a capital de Portugal?<|im_end|><|im_start|>assistant\n"
    example_title: Exemplo
  - text: "<|im_start|>user\nEscreva um poema sobre a floresta amazônica.<|im_end|><|im_start|>assistant\n"
    example_title: Exemplo
  - text: "<|im_start|>user\nListe três benefícios da energia solar.<|im_end|><|im_start|>assistant\n"
    example_title: Exemplo
 inference:
  parameters:
    repetition_penalty: 1.2
    temperature: 0.1
    top_k: 50
    top_p: 1.0
    max_new_tokens: 150
 co2_eq_emissions:
  emissions: 1230
  source: CodeCarbon
  training_type: post-training
  geographical_location: Germany
  hardware_used: NVIDIA A100-SXM4-80GB
 model-index:
  - name: Tucano2-qwen-0.5B-Think
    results:
      - task:
          type: text-generation
          name: Text Generation
        dataset:
          name: ARC Challenge
          type: Polygl0t/ARC-poly
          split: test
          args:
            num_few_shot: 5
        metrics:
          - type: acc_norm
            value: 32.74
            name: Acc-norm
        source:
          url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
          name: arc_challenge_poly_pt
      - task:
          type: text-generation
          name: Text Generation
        dataset:
          name: MMLU
          type: Polygl0t/MMLU-poly
          split: test
          args:
            num_few_shot: 5
        metrics:
          - type: acc
            value: 36.08
            name: Acc
        source:
          url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
          name: mmlu_poly_pt
      - task:
          type: text-generation
          name: Text Generation
        dataset:
          name: BELEBELE
          type: facebook/belebele
          split: test
          args:
            num_few_shot: 5
        metrics:
          - type: acc_norm
            value: 36.11
            name: Acc-norm
        source:
          url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
          name: belebele_por_Latn
      - task:
          type: text-generation
          name: Text Generation
        dataset:
          name: BLUEX
          type: eduagarcia-temp/BLUEX_without_images
          split: train
          args:
            num_few_shot: 3
        metrics:
          - type: acc
            value: 34.49
            name: Acc
        source:
          url: https://github.com/eduagarcia/lm-evaluation-harness-pt
          name: bluex
      - task:
          type: text-generation
          name: Text Generation
        dataset:
          name: ENEM Challenge
          type: eduagarcia/enem_challenge
          split: train
          args:
            num_few_shot: 3
        metrics:
          - type: acc
            value: 31.98
            name: Acc
        source:
          url: https://github.com/eduagarcia/lm-evaluation-harness-pt
          name: enem_challenge
      - task:
          type: text-generation
          name: Text Generation
        dataset:
          name: OAB Exams
          type: eduagarcia/oab_exams
          split: train
          args:
            num_few_shot: 3
        metrics:
          - type: acc
            value: 27.02
            name: Acc
        source:
          url: https://github.com/eduagarcia/lm-evaluation-harness-pt
          name: oab_exams
      - task:
          type: text-generation
          name: Text Generation
        dataset:
          name: IFEval
          type: Polygl0t/IFEval-PT
          split: train
          args:
            num_few_shot: 0
        metrics:
          - type: ifeval_pt_prompt_level_loose_acc
            value: 27.67
            name: Acc-loose
        source:
          url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
          name: ifeval_pt
      - task:
          type: text-generation
          name: Text Generation
        dataset:
          name: GSM8K
          type: Polygl0t/gsm8k-pt
          split: test
          args:
            num_few_shot: 0
        metrics:
          - type: flexible-extract
            value: 14.61
            name: Acc-flex
        source:
          url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
          name: gsm8k_pt
 base_model: Polygl0t/Tucano2-qwen-0.5B-Base
 ---
 # Tucano2-qwen-0.5B-Think
 <img src="./logo.png" alt="An illustration of a Tucano bird showing vibrant colors like yellow, orange, blue, green, and black." height="200">
 ## Model Summary
 **[Tucano2-qwen-0.5B-Think](https://huggingface.co/Polygl0t/Tucano2-qwen-0.5B-Think)** is an instruction-tuned Portuguese language model built on top of **Tucano2-qwen-0.5B-Base**. It has been trained using a combination of one round of supervised fine-tuning (SFT) and one round of Anchored Preference Optimization (APO).
 Tucano2-qwen-0.5B-Think is a reasoning model, which means it has been fine-tuned to generate CoT-style (Chain-of-Thought) traces in its responses. These reasoning traces are always encapsulated within the special tokens `<think>` and `</think>`.
 **All datasets, source code, and training recipes used to develop the Tucano2 series are fully open and reproducible.**
 ## Details
 - **Architecture:** a Transformer-based model ([`qwen3`](https://huggingface.co/docs/transformers/main/en/model_doc/qwen3))
 - **Size:** 490,799,104 parameters
 - **Context length:** 4,096 tokens
 - **Dataset(s):**
  - [Polygl0t/gigaverbo-v2-sft](https://huggingface.co/datasets/Polygl0t/gigaverbo-v2-sft)
  - [Polygl0t/gigaverbo-v2-preferences](https://huggingface.co/datasets/Polygl0t/gigaverbo-v2-preferences)
 - **Training time**: ~ 1.5 hours
 - **Emissions:** 1.23 KgCO2 (Germany)
 - **Total energy consumption:** 2.66 kWh
 This repository has the [source code](https://github.com/Polygl0t/llm-foundry) used to train this model. The full configuration used for training is available in the following config files:
 - Single stage Supervised Fine-Tuning (linear warmup with cosine decay): [training_config_sft.yaml](training_config_sft.yaml)
 - Single stage Anchored Preference Optimization (linear warmup with cosine decay): [training_config_apo.yaml](training_config_apo.yaml)
 - Training Logs (loss, lr, rewards, etc.): [train_logs_apo.parquet](train_logs_apo.parquet), [train_logs_sft.parquet](train_logs_sft.parquet)
 <details>
 <summary><b>SFT Loss Curve</b></summary>
 ![SFT Loss Curve](./.plots/sft_loss.png)
 </details>
 <details>
 <summary><b>APO Rewards</b></summary>
 ![APO Rewards](./.plots/apo_reward.png)
 </details>
 ## Intended Uses
 The primary intended use Tucano2-qwen-0.5B-Think is to serve as foundations for research and development involving Portuguese language modeling. You may also fine-tune and adapt Tucano2-qwen-0.5B-Think for deployment if your use follows the Apache 2.0 license. If you decide to use Tucano2-qwen-0.5B-Think as a basis for your fine-tuned model, please conduct your own risk and bias assessment.
 ## Basic usage
 ```python
 from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
 import torch
 # Load model and tokenizer
 model_id = "Polygl0t/Tucano2-qwen-0.5B-Think"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto"
 )
 # Configure generation parameters
 generation_config = GenerationConfig(
    do_sample=True,
    temperature=0.1,
    top_k=50,
    top_p=1.0,
    repetition_penalty=1.2,
    max_new_tokens=150,
    pad_token_id=tokenizer.eos_token_id,
 )
 # Prepare chat messages
 messages = [
    {"role": "user", "content": "Qual é a capital de Angola?"}
 ]
 # Apply chat template and generate
 prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 with torch.no_grad():
    outputs = model.generate(**inputs, generation_config=generation_config)
 # Decode and print response
 full_output = tokenizer.decode(
    outputs[0][len(inputs.input_ids[0]):],
    skip_special_tokens=True
 ).strip()
 # Extract <think>...</think> content
 think_content = None
 final_response = full_output
 if "<think>" in full_output and "</think>" in full_output:
    start = full_output.find("<think>") + len("<think>")
    end = full_output.find("</think>")
    think_content = full_output[start:end].strip()
    # Remove think block from final response
    final_response = (
        full_output[:full_output.find("<think>")] +
        full_output[end + len("</think>"):]
    ).strip()
 if think_content:
    print("🧠 Thinking:\n")
    print(think_content)
    print("\n" + "="*50 + "\n")
 print("🤖 Answer:\n")
 print(final_response)
 ```
 ## Limitations
 Like almost all other language models trained on large text datasets scraped from the web, the Tucano2-qwen-0.5B-Think shows behavior that does not make it an out-of-the-box solution to many real-world applications, especially those requiring factual, reliable, and nontoxic text generation. Tucano2-qwen-0.5B-Think is subject to the following:
 - **Hallucinations:** Tucano2-qwen-0.5B-Think can produce content that can be mistaken as true facts, but are misleading or entirely false, i.e., hallucination.
 - **Biases and Toxicity:** Tucano2-qwen-0.5B-Think inherits the social and historical stereotypes from the data used to train it. Given these biases, the model can produce toxic content, i.e., harmful, offensive, or detrimental to individuals, groups, or communities.
 - **Language Limitations:** Tucano2-qwen-0.5B-Think is primarily designed to interact with Portuguese. Other languages might challenge its comprehension, leading to potential misinterpretations or errors in response.
 - **Repetition and Verbosity:** Tucano2-qwen-0.5B-Think may get stuck on repetition loops (especially if the repetition penalty during generations is set to a meager value) or produce verbose responses unrelated to the prompt it was given.
 Hence, even though Tucano2-qwen-0.5B-Think is released with a permissive license, we urge users to perform their risk analysis on them if they intend to use them for real-world applications.
 ## Evaluations
 The table below compares the Tucano2 (Think variant) series against other reasoning models of similar size. We divide our evaluations into two sets:
 - **Knowledge & Reasoning:** ARC-Challenge, ENEM, BLUEX, OAB Exams, BELEBELE, MMLU, GSM8K-PT
 - **Instruction Following:** IFEval-PT
 The NPM (Normalized Performance Metric) provides a balanced view of model performance across tasks, accounting for each task's inherent difficulty by normalizing its evaluation score relative to its random baseline.
 We do not include coding benchmarks in this table because the Think models were not trained on coding data during post-training and thus perform poorly on them. For coding skills, we recommend using the Instruct models instead, which were trained with coding data and perform much better on coding benchmarks.
 |                             | Total Avg. | Knowledge & Reasoning (NPM) | Instruction Following |
 | --------------------------- | ---------- | --------------------------- | --------------------- |
 | **Tucano2-qwen-3.7B-Think** | 51.27      | 54.07                       | 31.67                 |
 | SmolLM3-3B                  | 48.58      | 46.28                       | 64.67                 |
 | Qwen3-4B                    | 46.35      | 40.97                       | 84                    |
 | Qwen3-1.7B                  | 36.54      | 32                          | 68.33                 |
 | **Tucano2-qwen-1.5B-Think** | 27.54      | 26.67                       | 33.67                 |
 | Qwen3-0.6B                  | 24.11      | 19.22                       | 58.33                 |
 | **Tucano2-qwen-0.5B-Think** | 14.41      | 12.52                       | 27.67                 |
 <details>
 <summary><b>Evaluation Suite</b></summary>
 | **Benchmark**             | **n-shot** | **Type**      | **Baseline** | **Metric**               |
 | ------------------------- | ---------- | ------------- | ------------ | ------------------------ |
 | **Knowledge & Reasoning** |            |               |              |                          |
 | ARC-Challenge             | 5-shot     | MC-Q&A        | 25           | `acc_norm`               |
 | ENEM                      | 3-shot     | MC-Q&A        | 20           | `acc`                    |
 | BLUEX                     | 3-shot     | MC-Q&A        | 22.5         | `acc`                    |
 | OAB Exams                 | 3-shot     | MC-Q&A        | 25           | `acc`                    |
 | BELEBELE                  | 5-shot     | MC-Q&A        | 25           | `acc_norm`               |
 | MMLU                      | 5-shot     | MC-Q&A        | 25           | `acc`                    |
 | GSM8K-PT                  | 0-shot     | Math Problems | 0            | `flexible-extract`       |
 | **Instruction Following** |            |               |              |                          |
 | IFEval-PT                 | 0-shot     | Instruction   | 0            | `prompt_level_loose_acc` |
 </details>
 <details>
 <summary><b>Individual Benchmarks</b></summary>
 |                             | BLUEX | ENEM  | OAB   | ARC Challenge | BELEBELE | MMLU  | IFEval-PT | GSM8K-PT |
 | --------------------------- | ----- | ----- | ----- | ------------- | -------- | ----- | --------- | -------- |
 | **Tucano2-qwen-3.7B-Think** | 63    | 71.52 | 53.76 | 55.38         | 83.56    | 61.18 | 31.67     | 56.7     |
 | SmolLM3-3B                  | 48.82 | 60.6  | 42.19 | 52.56         | 77.78    | 52.82 | 64.67     | 72.15    |
 | Qwen3-4B                    | 78.3  | 85.23 | 47.97 | 39.66         | 23       | 31.53 | 84        | 77.02    |
 | Qwen3-1.7B                  | 68.29 | 72.5  | 39.32 | 35.64         | 22.89    | 22.85 | 68.33     | 71.69    |
 | **Tucano2-qwen-1.5B-Think** | 39.22 | 39.89 | 34.26 | 42.82         | 67.67    | 43.3  | 33.67     | 22.83    |
 | Qwen3-0.6B                  | 43.53 | 48.71 | 34.21 | 29.91         | 22.89    | 22.85 | 58.33     | 58.37    |
 | **Tucano2-qwen-0.5B-Think** | 34.49 | 31.98 | 27.02 | 32.74         | 36.11    | 36.08 | 27.67     | 14.61    |
 </details>
 <details>
 <summary><b>Inference Samples on Various Tasks</b></summary>
 ### Cooking
 **Prompt:**
    <|im_start|>user
    Você pode me dar uma receita simples de Bolo de Brigadeiro?<|im_end|>
    <|im_start|>assistant
 **Response:**
    <think>
    O usuário está me pedindo uma receita simples de bolo de brigadeiro. Preciso garantir que a resposta seja útil e fácil de seguir, então vou começar explicando brevemente o que é um bolo de brigadeiro para contextualizar sua solicitação. Agora, preciso listar os ingredientes necessários em quantidades moderadas para evitar sobrecarga na lista inicial. Vou considerar itens básicos como farinha, açúcar, chocolate amargo picado, leite condensor, essência de baunilha, canela moída, coco ralado e raspas de laranja. É importante mencionar as precauções iniciais ao lidar com alguns desses ingredientes, especialmente quando se trata do chocolate derretido durante o forno. Devo também sugerir algumas dicas adicionais para tornar a experiência ainda mais agradável, como misturar bem todos os ingredientes antes de colocar no forno e decorá-lo depois. Finalizo lembrando novamente que tudo deve estar pronto dentro das primeiras cinco minutos após assar.
    </think>
    Claro! Aqui vai uma receitinha básica de **Bolinho de Brigadeirão**:
    ### Ingredientes:
    - 2 ovos grandes
    - 1 xícara (chá) de açúcar mascavo ou mel (180g)
    - 1/2 colher (sopa) de cacau em pó sem adoçante (30% ativo butylated buttercreme)
    - 3/4 xícara (cha) de leite integral (250ml)
    - 1/4 xícara (cha) de óleo vegetal neutro (20g)
    - 3 colheres (sopa) de farinha de trigo fina (200g)
    - Canela moida à gosto
    - Coco ralado fresco ou seco (1/2 colher (sobremesa))
    - Raspa(s) de limão siciliano (2 unidades)
    ### Modo de Preparo:
    1. Em uma panela média, misture todos os ingredientes secos (farinha, cacaue, etc.) até formar uma massa homogênea. Reserve.
    2. No liquidificador, bata juntos os ovos, o açúcar/mel, o leite, o óleo, a canela e o coco ralado. Adicione gradualmente o restante dos ingredientes secos enquanto continua batendo até obter uma mistura lisa e aerada.
    3. Transfira a mistura para uma forma redonda untada e enfarinhada. Leve ao forno pré-aquecido a 180°C (aproximadamente 350°F). Asse por cerca de 25-30 min., virando metade do caminho para cozinhar uniformemente.
    4. Retire do forno e deixe esfriar ligeiramente antes de cortar e servir. Decore com as raspas de limão caso goste!
    Espero que você aproveite esse delicioso Bolinho de Brigadeirão! Qualquer dúvida adicional, estou aqui pra ajudar 😊.<|im_end|>
 ---
 ### Math
 **Prompt:**
    <|im_start|>user
    Como eu posso resolver o seguinte problema: 2x + 3 = 11?<|im_end|>
    <|im_start|>assistant
 **Response:**
    <think>
    O usuário está me perguntando como resolver a equação linear 2x + 3 = 11. Preciso fornecer uma solução passo a passo para garantir que ele entenda cada etapa do raciocínio. Primeiro, vou identificar os elementos principais da equação: x é desconhecido e precisa ser isolado; ambos lados devem permanecer iguais (lei de conservação); há um termo constante no lado esquerdo (3). Agora preciso isolar o termo com 'x'. Para fazer isso, subtrairo 3 de ambos os lados porque estou fazendo a mesma operação em ambas as partes. Isso elimina o número 3, deixando apenas 'x' sozinho na esquerda. Deixe-me realizar esta subtração agora: 2x - 3 - 3 = 11 - 3. Simplificando, tenho 2x = 8. Com 'x' isolado, minha próxima tarefa será dividir ambos os lados por 2, já que estamos tentando encontrar o valor original de 'x', não sua derivada ou multiplicador. Realizando essa divisão, obtenho x = 4. Finalmente, formulo a resposta explicando ao usuário que 'x = 4' satisfaz a equação inicial 2x + 3 = 11, pois quando substituímos 4, obtemos 2*4 + 3 = 8, que simplifica para 8 + 3 = 11, confirmando assim a correção do cálculo.
    </think>
    Claro! Vamos resolver a equação \(2x + 3 = 11\) juntos. A ideia principal aqui é isolarmos a variável \(x\), geralmente movendo todos os outros termos para o outro lado da igualdade. Aqui estão os passos:
    1. **Subtraia 3 de ambos os lados:**
       - Começamos com a equação: \[2x + 3 = 11\]
       - Subtraindo 3 de ambos os lados: \[2(x + 3) = 10\]
    2. **Divida ambos os lados por 2:**
       - Agora temos: \[(x + 3) = 5\]
       - Dividindo ambos os lados por 2: \[x = 5 - 3\]
    3. **Simplifique:**
       - Chegamos à conclusão final: \[x = 2\]
    Portanto, a solução para a equação \(2x + 3 = 11\) é \(x = 2\).
    Se você tiver mais perguntas ou precisar de ajuda com outra equação, estou aqui para ajudar!<|im_end|>
 ---
 ### Rewriting
 **Prompt:**
    <|im_start|>user
    Remova todas as informações irrelevantes da frase a seguir.
    "O novo restaurante que abriu no centro da cidade, de propriedade do primo de Jorge, que costumava ser chef de um restaurante em Ipanema, serve uma variedade de cozinhas de todo o mundo."<|im_end|>
    <|im_start|>assistant
 **Response:**
    <think>
    O usuário está me pedindo para remover quaisquer informações irrelevantes ou desnecessárias da seguinte frase: 'O novo restaurante que abriu no centro da cidade, de propriedade do primo de Jorge, que costumava ser chef de um restaurante em Ipanema, serve uma variedade de cozinhas de todo o mundo.' Primeiro, vou ler cuidadosamente toda a estrutura gramatical e identificar os principais elementos da sentença. A frase contém vários componentes importantes: sujeito ('novo restaurante'), localização ('centro da cidade') e características específicas (propriedade do primo de Jorge). Agora preciso considerar quais partes podem ser removidas sem alterar significativamente o sentido geral da mensagem. Devo evitar mencionar pessoas ausentes, lugares específicos não utilizados além das indicações básicas, nem detalhes técnicos excessivos como nomes próprios ou termos complicados. Vou começar pela eliminação dos pronomes pessoais e adjetivos redundantemente repetidos. Posso simplificar 'que costumava' usando apenas 'costumava', mantendo assim a essência original. Também posso substituir 'chef' por algo mais genérico relacionado ao trabalho culinário, como 'cozinheiro'. Além disso, eliminar algumas palavras extras pode deixar a frase mais concisa. Finalmente, revisarei tudo novamente para garantir que nenhum elemento importante tenha sido omitido durante esse processo de limpeza. Após essas modificações, obtenho esta versão refinada da frase: 'Um novo estabelecimento gastronômico inaugurado na área central da cidade...serve diversos estilos de cozinha internacionalmente reconhecidos.' Essa reformulação preservou todos os aspectos relevantes enquanto reduziu consideravelmente seu comprimento inicial.
    </think>
    Claro! Aqui está a frase após a remoção das informações irrelevantes:
    > Um novo estabelecimento gastronômico inaugurado na área central da cidade...serve diversos estilos de cozinha internacionalmente reconhecidos.
    Notei alguns ajustes menores aqui:
    - Substituí "primo de Jorge" pelo nome próprio equivalente ("Jorge") porque parece haver algum erro tipográfico nesse caso específico. No entanto, isso foi feito apenas para corrigir possíveis inconsistências; eu mantenho a grafia exata conforme apareceu originalmente nos seus dados.
    - Ajustei ligeiramente outros termos para maior clareza, mas manteve-se fiel aos originais. Por exemplo, usei "estabelecimento gastronômico" em vez de "restaurante".<|im_end|>
 ---
 ### Science Explanation
 **Prompt:**
    <|im_start|>user
    Explique a teoria da relatividade de forma simples (ELI5).<|im_end|>
    <|im_start|>assistant
 **Response:**
    <think>
    O usuário está me pedindo para explicar a Teoria da Relatividade em ELI5. Preciso garantir que minha resposta seja clara e concisa, mantendo o foco nos principais pontos. Primeiro, vou lembrar os elementos básicos dessa teoria: especial e geral. Agora, preciso estruturar meu raciocínio de maneira lógica e fácil de entender. Começo pela introdução do paradoxo dos gêmeos, pois ele ilustra bem as ideias centrais da teoria. Depois, passo às duas versões específicas da teoria — especial e geral —, destacando suas diferenças cruciais. Finalizo com exemplos práticos para ilustrar melhor cada ponto. Vou revisar rapidamente todos esses tópicos antes de formular a resposta completa.
    </think>
    Claro! A **Teoria da Relatividade** é um conjunto de princípios formulados por Albert Einstein no início do século XX. Ela divide-se basicamente entre dois aspectos principais:
    1. **A Teoria Especial da Relatividade (1905)** - Esta parte trata das leis físicas invariantes sob transformações de Lorentz, ou seja, como essas leis funcionam independentemente do movimento relativo de observadores diferentes. Um exemplo famoso disso é o paradoxo dos gêmeos, onde um irmão viaja pelo espaço enquanto seu amigo fica parado aqui na Terra. No final, quem volta mais jovem porque correu menos tempo durante sua viagem espacial.
    2. **A Generalização da Teoria Especial da Relatividade (1915)** - Aqui, Einstein expande ainda mais seus conceitos anteriores, introduzindo a ideia de gravidade através de campos tensoriais, não apenas força gravitacional convencional. Isso levou à formulação da Equação de Friedewald, embora ela tenha sido inicialmente proposta separadamente por Hendrik Lorentz.
    Em termos simplificados, podemos dizer que a Teoria da Relatividade tenta unificar três grandes questões fundamentais sobre o universo:
    - Como medimos distâncias e tempos dependendo do nosso estado de movimento?
    - Existe uma relação universal entre massa e energia descrita pela famosa equação \( E = mc^2 \)?
    - Gravidade realmente afeta tudo ao redor dela? Ou seria algo diferente?
    Essas perguntas foram respondidas parcialmente pelas teorias modernas, mas continuam sendo objeto intenso de pesquisa científica até hoje. Espero que isso ajude você a entender alguns pontos importantes! Se tiver alguma outra dúvida específica, estou aqui pra ajudar.<|im_end|>
 ---
 ### History
 **Prompt:**
    <|im_start|>user
    Liste os principais eventos envolvendo a Revolução Farroupilha.<|im_end|>
    <|im_start|>assistant
 **Response:**
    <think>
    O usuário está me perguntando para listar os principais eventos da Revolução Farroupilha, também conhecida como Guerra dos Dezos. Preciso garantir que minha resposta seja precisa e abrangente, cobrindo todos os momentos cruciais dessa importante fase da história do Rio Grande do Sul. Vou começar organizando meus pensamentos em categorias lógicas: início da guerra (1845), promulgação de leis (1844), batalhas significativas (1836-1845) e consequências pós-guerra (1883). Agora, vou estruturar cada categoria com exemplos específicos de eventos importantes dentro delas. Para iniciar, o evento inicial é fundamental: as tropas farroupilhas lideradas por Bento Gonçalves desembarcam na Laguna em 1849. Isso marca oficialmente o começo da revolta. Em seguida, vem a assinatura do Tratado de Ponche Verde em 1845, onde os rebeldes aceitam termos favoráveis aos interesses sulinos. Outro momento crucial são as Leis Provinciais nºs 1 e 2 de 1844, que estabelecem autonomia provincial e controle local sobre questões locais, respectivamente. A Batalha de Porongos continua sendo vital, pois mostra heroísmo militar e simboliza unidade entre diferentes facções. Após esse período turbulento, há decisões políticas críticas, especialmente aquelas relacionadas à formação das Províncias Unidas do Rio Grande do Sul e ao apoio imperial através do Decreto Imperial nº 5.246. As Guerras Civis subsequentes continuam até sua derrota definitiva em 1883. Finalmente, preciso mencionar brevemente algumas repercussões duradouras, incluindo influências culturais e econômicas no estado atual. Com esses elementos claros, posso agora redigir uma lista completa e coesa.
    </think>
    Claro! Aqui estão alguns dos principais eventos relacionados à Revolução Farroupilha (Guerra dos Debates):
    1. **Início da Guerra (1845-1845)**:
       - As forças farroupilhas iniciaram seu levante após um incidente naval perto de São Gabriel, RS. Inicialmente liderados pelo general David Canabarro, eles rapidamente ganharam apoio popular nas regiões litorâneas.
    2. **Promulgações Legislativas (1844)**:
       - Foi promulgada a Lei Provincial número 1, estabelecendo eleições livres nos municípios;
       - Seguidamente, foi sancionada a Lei Provincial número 2, declarando todo território gaúcho livre de impostos provinciais.
    3. **Batalhas Significativas (1836–1845)**:
       - **Laguna**: Uma vitória decisiva das tropas imperiais sob comando do marechal Carlos Frederico Lenzi, marcando formalmente o fim imediato da rebelião.
       - **Porongos**: Um massacre sangrento durante a rendição final das forças farrapas, protagonizado pela escrava Zacimba Gala e seus companheiros, simbolizando união nacional mesmo diante da adversidade extrema.
    4. **Consequências Pós-Guerra (1868–1883)**:
       - Os remanescentes insurretos fundaram duas novas províncias republicanas: República Juliana (1918) e Província Independente do Rio Grande do Sul (1920).
       - O governo central retomou parcialmente suas possessões sulinas mediante negociação diplomática culminando no Tratado de Ponche Verde em 1845.
    Esses pontos representam apenas parte significativa desses acontecimentos históricos complexos. Espero que isso ajude você a entender melhor a contextura desta importante luta regional brasileira!<|im_end|>
 ---
 </details>
 Below, we compare the performance of Tucano2-qwen-0.5B-Think with Qwen3-0.6B, a strong baseline in the 0.5B parameter range. The percentages represent the absolute difference in performance between the two models on each benchmark. All other plots can be found in the [.plots](https://huggingface.co/Polygl0t/Tucano2-qwen-0.5B-Think/tree/main/.plots/) folder.
 **Tucano2-qwen-0.5B-Think vs Qwen3-0.6B**
 ![Performance Comparison](./.plots/model_comparison.png)
 ## Cite as 🤗
 ```latex
@misc{correa2026tucano2cool,
      title={{Tucano 2 Cool: Better Open Source LLMs for Portuguese}}, 
      author={Nicholas Kluge Corr{\^e}a and Aniket Sen and Shiza Fatimah and Sophia Falk and Lennard Landgraf and Julia Kastner and Lucie Flek},
      year={2026},
      eprint={2603.03543},
      archivePrefix={arXiv},
      primaryClass={cs.CL},
      url={https://arxiv.org/abs/2603.03543}, 
 }
 ```
 ## Aknowlegments
 Polyglot is a project funded by the Federal Ministry of Education and Research (BMBF) and the Ministry of Culture and Science of the State of North Rhine-Westphalia (MWK) as part of TRA Sustainable Futures (University of Bonn) and the Excellence Strategy of the federal and state governments.
 We also gratefully acknowledge the granted access to the [Marvin cluster](https://www.hpc.uni-bonn.de/en/systems/marvin) hosted by [University of Bonn](https://www.uni-bonn.de/en) along with the support provided by its High Performance Computing & Analytics Lab.
 ## License
 Tucano2-qwen-0.5B-Think is licensed under the Apache License, Version 2.0. For more details, see the [LICENSE](LICENSE) file.
--- a/chat_template.jinja
+++ b/chat_template.jinja
@@ -0,0 +1,114 @@
 {#- Handle tool/function calling setup #}
 {%- if tools %}
    {{- '<|im_start|>system\n' }}
    {#- Include system message if present #}
    {%- if messages[0].role == 'system' %}
        {{- messages[0].content + '\n\n' }}
    {%- endif %}
    {#- Add tool calling instructions in Portuguese #}
    {{- "# Tools / Ferramentas\n\nVocê pode chamar uma ou mais funções para auxiliar na consulta do usuário.\n\nVocê recebe assinaturas de funções dentro de tags XML <tools></tools>:\n<tools>" }}
    {%- for tool in tools %}
        {{- "\n" }}
        {{- tool | tojson }}
    {%- endfor %}
    {{- "\n</tools>\n\nPara cada chamada de função, retorne um objeto json com o nome da função e os argumentos dentro das tags XML <tool_call></tool_call>:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
 {%- else %}
    {#- Standard system message without tools #}
    {%- if messages[0].role == 'system' %}
        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
    {%- endif %}
 {%- endif %}
 {#- Detect multi-step tool usage by finding the last real user query #}
 {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
 {%- for message in messages[::-1] %}
    {%- set index = (messages|length - 1) - loop.index0 %}
    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
        {%- set ns.multi_step_tool = false %}
        {%- set ns.last_query_index = index %}
    {%- endif %}
 {%- endfor %}
 {#- Process each message in the conversation #}
 {%- for message in messages %}
    {#- Normalize content to string #}
    {%- if message.content is string %}
        {%- set content = message.content %}
    {%- else %}
        {%- set content = '' %}
    {%- endif %}
    {#- Handle user messages and non-first system messages #}
    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
    {#- Handle assistant messages with reasoning #}
    {%- elif message.role == "assistant" %}
        {#- Extract reasoning content if present #}
        {%- set reasoning_content = '' %}
        {%- if message.reasoning_content is string %}
            {%- set reasoning_content = message.reasoning_content %}
        {%- else %}
            {#- Parse <think></think> tags from content #}
            {%- if '</think>' in content %}
                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
            {%- endif %}
        {%- endif %}
        {{- '<|im_start|>' + message.role }}
        {% generation %}
        {#- Add reasoning tags for messages after last user query #}
        {%- if loop.index0 > ns.last_query_index %}
            {%- if loop.last or (not loop.last and reasoning_content) %}
                {{- '<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
            {%- else %}
                {{- content }}
            {%- endif %}
        {%- else %}
            {{- content }}
        {%- endif %}
        {#- Add tool calls if present #}
        {%- if message.tool_calls %}
            {%- for tool_call in message.tool_calls %}
                {%- if (loop.first and content) or (not loop.first) %}
                    {{- '\n' }}
                {%- endif %}
                {#- Normalize tool call format #}
                {%- if tool_call.function %}
                    {%- set tool_call = tool_call.function %}
                {%- endif %}
                {{- '<tool_call>\n{"name": "' }}
                {{- tool_call.name }}
                {{- '", "arguments": ' }}
                {%- if tool_call.arguments is string %}
                    {{- tool_call.arguments }}
                {%- else %}
                    {{- tool_call.arguments | tojson }}
                {%- endif %}
                {{- '}\n</tool_call>' }}
            {%- endfor %}
        {%- endif %}
        {{- '<|im_end|>' }}
        {% endgeneration %}
    {#- Handle tool response messages #}
    {%- elif message.role == "tool" %}
        {#- Group consecutive tool responses under one user message #}
        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
            {{- '<|im_start|>user' }}
        {%- endif %}
        {{- '\n<tool_response>\n' }}
        {{- content }}
        {{- '\n</tool_response>' }}
        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
            {{- '<|im_end|>\n' }}
        {%- endif %}
    {%- endif %}
 {%- endfor %}
 {#- Add generation prompt if requested #}
 {%- if add_generation_prompt %}
    {{- '<|im_start|>assistant\n' }}
 {%- endif %}
--- a/config.json
+++ b/config.json
@@ -0,0 +1,61 @@
 {
  "architectures": [
    "Qwen3ForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "dtype": "bfloat16",
  "eos_token_id": 2,
  "head_dim": 128,
  "hidden_act": "silu",
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_types": [
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention",
    "full_attention"
  ],
  "max_position_embeddings": 4096,
  "max_window_layers": 28,
  "model_type": "qwen3",
  "num_attention_heads": 16,
  "num_hidden_layers": 28,
  "num_key_value_heads": 8,
  "pad_token_id": 49109,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "rope_theta": 1000000,
  "sliding_window": null,
  "tie_word_embeddings": true,
  "transformers_version": "4.57.3",
  "use_cache": false,
  "use_sliding_window": false,
  "vocab_size": 49152
 }
--- a/evals.yaml
+++ b/evals.yaml
@@ -0,0 +1,207 @@
 evaluations:
  arc_challenge_poly_pt_acc: 0.30427350427350425
  arc_challenge_poly_pt_acc_norm: 0.32735042735042735
  arc_challenge_poly_pt_acc_norm_stderr: 0.013724408490743929
  arc_challenge_poly_pt_acc_stderr: 0.013456870841978025
  arc_challenge_poly_pt_alias: arc_challenge_poly_pt
  assin2_rte_acc,all: 0.5
  assin2_rte_acc_stderr,all: 0.007138073526203373
  assin2_rte_alias: assin2_rte
  assin2_rte_f1_macro,all: 0.3333333333333333
  assin2_rte_f1_macro_stderr,all: 0.003173025139437921
  assin2_sts_alias: assin2_sts
  assin2_sts_mse,all: 3.059456699346405
  assin2_sts_mse_stderr,all: N/A
  assin2_sts_pearson,all: 0.0276213074062244
  assin2_sts_pearson_stderr,all: 0.011665375504884477
  assin_entailment_acc: 0.7335
  assin_entailment_acc_stderr: 0.006991541883910775
  assin_entailment_alias: assin_entailment
  assin_paraphrase_acc: 0.71575
  assin_paraphrase_acc_stderr: 0.0071327206100355
  assin_paraphrase_alias: assin_paraphrase
  belebele_por_Latn_acc: 0.3611111111111111
  belebele_por_Latn_acc_norm: 0.3611111111111111
  belebele_por_Latn_acc_norm_stderr: 0.016019658270537297
  belebele_por_Latn_acc_stderr: 0.016019658270537297
  belebele_por_Latn_alias: belebele_por_Latn
  bluex_acc,all: 0.34492350486787204
  bluex_acc,exam_id__UNICAMP_2018: 0.4074074074074074
  bluex_acc,exam_id__UNICAMP_2019: 0.28
  bluex_acc,exam_id__UNICAMP_2020: 0.36363636363636365
  bluex_acc,exam_id__UNICAMP_2021_1: 0.2391304347826087
  bluex_acc,exam_id__UNICAMP_2021_2: 0.23529411764705882
  bluex_acc,exam_id__UNICAMP_2022: 0.2564102564102564
  bluex_acc,exam_id__UNICAMP_2023: 0.5581395348837209
  bluex_acc,exam_id__UNICAMP_2024: 0.4
  bluex_acc,exam_id__USP_2018: 0.3148148148148148
  bluex_acc,exam_id__USP_2019: 0.3
  bluex_acc,exam_id__USP_2020: 0.30357142857142855
  bluex_acc,exam_id__USP_2021: 0.36538461538461536
  bluex_acc,exam_id__USP_2022: 0.3673469387755102
  bluex_acc,exam_id__USP_2023: 0.38636363636363635
  bluex_acc,exam_id__USP_2024: 0.4146341463414634
  bluex_acc_stderr,all: 0.010242775047627772
  bluex_acc_stderr,exam_id__UNICAMP_2018: 0.038534741152469736
  bluex_acc_stderr,exam_id__UNICAMP_2019: 0.0367961057296462
  bluex_acc_stderr,exam_id__UNICAMP_2020: 0.037368794971719924
  bluex_acc_stderr,exam_id__UNICAMP_2021_1: 0.036222183210060134
  bluex_acc_stderr,exam_id__UNICAMP_2021_2: 0.034079376824662555
  bluex_acc_stderr,exam_id__UNICAMP_2022: 0.04027891922030821
  bluex_acc_stderr,exam_id__UNICAMP_2023: 0.04379564728920204
  bluex_acc_stderr,exam_id__UNICAMP_2024: 0.04209744896628822
  bluex_acc_stderr,exam_id__USP_2018: 0.03647519527201769
  bluex_acc_stderr,exam_id__USP_2019: 0.04181463130976304
  bluex_acc_stderr,exam_id__USP_2020: 0.03539764734011866
  bluex_acc_stderr,exam_id__USP_2021: 0.03859991685452418
  bluex_acc_stderr,exam_id__USP_2022: 0.0397939306907615
  bluex_acc_stderr,exam_id__USP_2023: 0.042529993265009204
  bluex_acc_stderr,exam_id__USP_2024: 0.04456715995150719
  bluex_alias: bluex
  calame_pt_acc: 0.09489402697495183
  calame_pt_acc_stderr: 0.006433689590297553
  calame_pt_alias: calame_pt
  calame_pt_perplexity: 8435.632130447184
  calame_pt_perplexity_stderr: 963.9665411064852
  enem_challenge_acc,all: 0.3198040587823653
  enem_challenge_acc,exam_id__2009: 0.3130434782608696
  enem_challenge_acc,exam_id__2010: 0.38461538461538464
  enem_challenge_acc,exam_id__2011: 0.29914529914529914
  enem_challenge_acc,exam_id__2012: 0.31896551724137934
  enem_challenge_acc,exam_id__2013: 0.37037037037037035
  enem_challenge_acc,exam_id__2014: 0.3394495412844037
  enem_challenge_acc,exam_id__2015: 0.3025210084033613
  enem_challenge_acc,exam_id__2016: 0.3140495867768595
  enem_challenge_acc,exam_id__2016_2: 0.2764227642276423
  enem_challenge_acc,exam_id__2017: 0.25
  enem_challenge_acc,exam_id__2022: 0.3233082706766917
  enem_challenge_acc,exam_id__2023: 0.34814814814814815
  enem_challenge_acc_stderr,all: 0.007148951828535609
  enem_challenge_acc_stderr,exam_id__2009: 0.024960208775102345
  enem_challenge_acc_stderr,exam_id__2010: 0.025924496341894532
  enem_challenge_acc_stderr,exam_id__2011: 0.02449152244622838
  enem_challenge_acc_stderr,exam_id__2012: 0.024889831032253663
  enem_challenge_acc_stderr,exam_id__2013: 0.02682596737556379
  enem_challenge_acc_stderr,exam_id__2014: 0.026210250439613336
  enem_challenge_acc_stderr,exam_id__2015: 0.02434000254473757
  enem_challenge_acc_stderr,exam_id__2016: 0.024414986986171384
  enem_challenge_acc_stderr,exam_id__2016_2: 0.023342214704782527
  enem_challenge_acc_stderr,exam_id__2017: 0.02314953920904528
  enem_challenge_acc_stderr,exam_id__2022: 0.023367019181975722
  enem_challenge_acc_stderr,exam_id__2023: 0.023749430869741607
  enem_challenge_alias: enem
  faquad_nli_acc,all: 0.7846153846153846
  faquad_nli_acc_stderr,all: 0.011396120309131366
  faquad_nli_alias: faquad_nli
  faquad_nli_f1_macro,all: 0.4396551724137931
  faquad_nli_f1_macro_stderr,all: 0.00357969847290883
  global_piqa_completions_por_latn_braz_acc: 0.7
  global_piqa_completions_por_latn_braz_acc_bytes: 0.69
  global_piqa_completions_por_latn_braz_acc_bytes_stderr: 0.046482319871173176
  global_piqa_completions_por_latn_braz_acc_norm: 0.68
  global_piqa_completions_por_latn_braz_acc_norm_stderr: 0.046882617226215076
  global_piqa_completions_por_latn_braz_acc_stderr: 0.04605661864718383
  global_piqa_completions_por_latn_braz_alias: global_piqa_completions_por_latn_braz
  gsm8k_pt_alias: gsm8k_pt
  gsm8k_pt_exact_match,flexible-extract: 0.1461187214611872
  gsm8k_pt_exact_match,strict-match: 0.0
  gsm8k_pt_exact_match_stderr,flexible-extract: 0.009748085159664667
  gsm8k_pt_exact_match_stderr,strict-match: 0.0
  hatebr_offensive_acc,all: 0.5328571428571428
  hatebr_offensive_acc_stderr,all: 0.009452579861644791
  hatebr_offensive_alias: hatebr_offensive_binary
  hatebr_offensive_f1_macro,all: 0.4054916057180851
  hatebr_offensive_f1_macro_stderr,all: 0.007877082274553134
  hellaswag_poly_pt_acc: 0.3781558131975295
  hellaswag_poly_pt_acc_norm: 0.47209881894029687
  hellaswag_poly_pt_acc_norm_stderr: 0.005196835630828483
  hellaswag_poly_pt_acc_stderr: 0.005048035343611143
  hellaswag_poly_pt_alias: hellaswag_poly_pt
  humaneval_instruct_alias: humaneval_instruct
  humaneval_instruct_pass@1,create_test: 0.0
  humaneval_instruct_pass@1_stderr,create_test: 0.0
  ifeval_pt_alias: ifeval_pt
  ifeval_pt_inst_level_loose_acc: 0.3930232558139535
  ifeval_pt_inst_level_loose_acc_stderr: N/A
  ifeval_pt_inst_level_strict_acc: 0.3302325581395349
  ifeval_pt_inst_level_strict_acc_stderr: N/A
  ifeval_pt_prompt_level_loose_acc: 0.27666666666666667
  ifeval_pt_prompt_level_loose_acc_stderr: 0.025870931391123536
  ifeval_pt_prompt_level_strict_acc: 0.21
  ifeval_pt_prompt_level_strict_acc_stderr: 0.023555243542102446
  lambada_poly_pt_acc: 0.20861633999611875
  lambada_poly_pt_acc_stderr: 0.005660825573438258
  lambada_poly_pt_alias: lambada_poly_pt
  lambada_poly_pt_perplexity: 861.6919525698232
  lambada_poly_pt_perplexity_stderr: 63.082803555905926
  mmlu_poly_pt_acc: 0.3607775442809967
  mmlu_poly_pt_acc_stderr: 0.004160492531275285
  mmlu_poly_pt_alias: mmlu_poly_pt
  oab_exams_acc,all: 0.27015945330296126
  oab_exams_acc,exam_id__2010-01: 0.24705882352941178
  oab_exams_acc,exam_id__2010-02: 0.28
  oab_exams_acc,exam_id__2011-03: 0.24242424242424243
  oab_exams_acc,exam_id__2011-04: 0.225
  oab_exams_acc,exam_id__2011-05: 0.325
  oab_exams_acc,exam_id__2012-06: 0.3
  oab_exams_acc,exam_id__2012-06a: 0.2125
  oab_exams_acc,exam_id__2012-07: 0.3125
  oab_exams_acc,exam_id__2012-08: 0.1375
  oab_exams_acc,exam_id__2012-09: 0.22077922077922077
  oab_exams_acc,exam_id__2013-10: 0.2375
  oab_exams_acc,exam_id__2013-11: 0.275
  oab_exams_acc,exam_id__2013-12: 0.2375
  oab_exams_acc,exam_id__2014-13: 0.2875
  oab_exams_acc,exam_id__2014-14: 0.25
  oab_exams_acc,exam_id__2014-15: 0.21794871794871795
  oab_exams_acc,exam_id__2015-16: 0.2875
  oab_exams_acc,exam_id__2015-17: 0.3333333333333333
  oab_exams_acc,exam_id__2015-18: 0.275
  oab_exams_acc,exam_id__2016-19: 0.2692307692307692
  oab_exams_acc,exam_id__2016-20: 0.3625
  oab_exams_acc,exam_id__2016-20a: 0.275
  oab_exams_acc,exam_id__2016-21: 0.3
  oab_exams_acc,exam_id__2017-22: 0.3375
  oab_exams_acc,exam_id__2017-23: 0.3
  oab_exams_acc,exam_id__2017-24: 0.3
  oab_exams_acc,exam_id__2018-25: 0.25
  oab_exams_acc_stderr,all: 0.0054708493847973475
  oab_exams_acc_stderr,exam_id__2010-01: 0.027072546734021444
  oab_exams_acc_stderr,exam_id__2010-02: 0.025924135072639538
  oab_exams_acc_stderr,exam_id__2011-03: 0.024845779837305654
  oab_exams_acc_stderr,exam_id__2011-04: 0.026955276691079964
  oab_exams_acc_stderr,exam_id__2011-05: 0.03024661215761147
  oab_exams_acc_stderr,exam_id__2012-06: 0.02951246277176116
  oab_exams_acc_stderr,exam_id__2012-06a: 0.026389734380509827
  oab_exams_acc_stderr,exam_id__2012-07: 0.029841237684623958
  oab_exams_acc_stderr,exam_id__2012-08: 0.0221709705988969
  oab_exams_acc_stderr,exam_id__2012-09: 0.027302999732294067
  oab_exams_acc_stderr,exam_id__2013-10: 0.0274908309628061
  oab_exams_acc_stderr,exam_id__2013-11: 0.02882257223036154
  oab_exams_acc_stderr,exam_id__2013-12: 0.027413917931195794
  oab_exams_acc_stderr,exam_id__2014-13: 0.02917450516650155
  oab_exams_acc_stderr,exam_id__2014-14: 0.027979401755101777
  oab_exams_acc_stderr,exam_id__2014-15: 0.027011736935613068
  oab_exams_acc_stderr,exam_id__2015-16: 0.029203723510899393
  oab_exams_acc_stderr,exam_id__2015-17: 0.030881077024925714
  oab_exams_acc_stderr,exam_id__2015-18: 0.0288121979710083
  oab_exams_acc_stderr,exam_id__2016-19: 0.029017747344098694
  oab_exams_acc_stderr,exam_id__2016-20: 0.030947316988827907
  oab_exams_acc_stderr,exam_id__2016-20a: 0.02885616149691637
  oab_exams_acc_stderr,exam_id__2016-21: 0.029466001599879478
  oab_exams_acc_stderr,exam_id__2017-22: 0.0304433823531397
  oab_exams_acc_stderr,exam_id__2017-23: 0.0295029507416334
  oab_exams_acc_stderr,exam_id__2017-24: 0.029614631804279756
  oab_exams_acc_stderr,exam_id__2018-25: 0.028064299041816978
  oab_exams_alias: oab_exams
  portuguese_hate_speech_acc,all: 0.30552291421856637
  portuguese_hate_speech_acc_stderr,all: 0.0111342367647186
  portuguese_hate_speech_alias: portuguese_hate_speech_binary
  portuguese_hate_speech_f1_macro,all: 0.24331655796529078
  portuguese_hate_speech_f1_macro_stderr,all: 0.007523479961403228
  tweetsentbr_acc,all: 0.34626865671641793
  tweetsentbr_acc_stderr,all: 0.007507813204647027
  tweetsentbr_alias: tweetsentbr
  tweetsentbr_f1_macro,all: 0.24098176805099736
  tweetsentbr_f1_macro_stderr,all: 0.005874676993024155
 step: 3595
--- a/generation_config.json
+++ b/generation_config.json
@@ -0,0 +1,13 @@
 {
  "bos_token_id": 1,
  "do_sample": true,
  "eos_token_id": [
    2
  ],
  "max_new_tokens": 1024,
  "pad_token_id": 49109,
  "renormalize_logits": true,
  "repetition_penalty": 1.2,
  "temperature": 0.1,
  "transformers_version": "4.57.3"
 }
--- a/logo.png
+++ b/logo.png
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:1856d91c3b35390cee5122902d94044657c67df7034ca4005316275c404fc8a0
 size 197189
--- a/model.safetensors
+++ b/model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:e83956aec017fc069d60714d38ee9595ad35e9d242c722c865861340bbf1ebf8
 size 981633328
--- a/ruler.yaml
+++ b/ruler.yaml
@@ -0,0 +1,82 @@
 model_name: Tucano2-qwen-1.5B-Think
 results:
  niah_pt_multikey_1_1024: 0.706
  niah_pt_multikey_1_1024_stderr: 0.02039509548493655
  niah_pt_multikey_1_2048: 0.67
  niah_pt_multikey_1_2048_stderr: 0.021049612166134782
  niah_pt_multikey_1_4096: 0.542
  niah_pt_multikey_1_4096_stderr: N/A
  niah_pt_multikey_1_alias: " - niah_pt_multikey_1"
  niah_pt_multikey_2_1024: 0.48
  niah_pt_multikey_2_1024_stderr: 0.022365160424231326
  niah_pt_multikey_2_2048: 0.238
  niah_pt_multikey_2_2048_stderr: 0.019064072958198387
  niah_pt_multikey_2_4096: 0.062
  niah_pt_multikey_2_4096_stderr: N/A
  niah_pt_multikey_2_alias: " - niah_pt_multikey_2"
  niah_pt_multikey_3_1024: 0.486
  niah_pt_multikey_3_1024_stderr: 0.022374298166353144
  niah_pt_multikey_3_2048: 0.31
  niah_pt_multikey_3_2048_stderr: 0.020704041021724684
  niah_pt_multikey_3_4096: 0.184
  niah_pt_multikey_3_4096_stderr: N/A
  niah_pt_multikey_3_alias: " - niah_pt_multikey_3"
  niah_pt_multiquery_1024: 0.531
  niah_pt_multiquery_1024_stderr: 0.013691344193015646
  niah_pt_multiquery_2048: 0.4915
  niah_pt_multiquery_2048_stderr: 0.014053487147395266
  niah_pt_multiquery_4096: 0.4215
  niah_pt_multiquery_4096_stderr: N/A
  niah_pt_multiquery_alias: " - niah_pt_multiquery"
  niah_pt_multivalue_1024: 0.4995
  niah_pt_multivalue_1024_stderr: 0.013476376569794338
  niah_pt_multivalue_2048: 0.519
  niah_pt_multivalue_2048_stderr: 0.013327913059930505
  niah_pt_multivalue_4096: 0.4545
  niah_pt_multivalue_4096_stderr: N/A
  niah_pt_multivalue_alias: " - niah_pt_multivalue"
  niah_pt_single_1_1024: 0.82
  niah_pt_single_1_1024_stderr: 0.017198592476314233
  niah_pt_single_1_2048: 0.816
  niah_pt_single_1_2048_stderr: 0.017346174781752842
  niah_pt_single_1_4096: 0.8
  niah_pt_single_1_4096_stderr: N/A
  niah_pt_single_1_alias: " - niah_pt_single_1"
  niah_pt_single_2_1024: 0.778
  niah_pt_single_2_1024_stderr: 0.018604414758250098
  niah_pt_single_2_2048: 0.772
  niah_pt_single_2_2048_stderr: 0.018781306529363172
  niah_pt_single_2_4096: 0.688
  niah_pt_single_2_4096_stderr: N/A
  niah_pt_single_2_alias: " - niah_pt_single_2"
  niah_pt_single_3_1024: 0.468
  niah_pt_single_3_1024_stderr: 0.022337186479044296
  niah_pt_single_3_2048: 0.508
  niah_pt_single_3_2048_stderr: 0.022380208834928014
  niah_pt_single_3_4096: 0.5
  niah_pt_single_3_4096_stderr: N/A
  niah_pt_single_3_alias: " - niah_pt_single_3"
  ruler_pt_4096: 0.44008484848484847
  ruler_pt_4096_stderr: N/A
  ruler_pt_alias: ruler_pt
  ruler_pt_cwe_1024: 0.2516
  ruler_pt_cwe_1024_stderr: 0.0065879953982022075
  ruler_pt_cwe_2048: 0.10560000000000001
  ruler_pt_cwe_2048_stderr: 0.0046626989526502875
  ruler_pt_cwe_4096: 0.268
  ruler_pt_cwe_4096_stderr: N/A
  ruler_pt_cwe_alias: " - ruler_pt_cwe"
  ruler_pt_fwe_1024: 0.7766666666666666
  ruler_pt_fwe_1024_stderr: 0.010771818051204566
  ruler_pt_fwe_2048: 0.644
  ruler_pt_fwe_2048_stderr: 0.010544896116732008
  ruler_pt_fwe_4096: 0.5413333333333332
  ruler_pt_fwe_4096_stderr: N/A
  ruler_pt_fwe_alias: " - ruler_pt_fwe"
  ruler_pt_vt_1024: 0.8336
  ruler_pt_vt_1024_stderr: 0.01194434656352784
  ruler_pt_vt_2048: 0.4344
  ruler_pt_vt_2048_stderr: 0.014185758756689964
  ruler_pt_vt_4096: 0.37960000000000005
  ruler_pt_vt_4096_stderr: N/A
  ruler_pt_vt_alias: " - ruler_pt_vt"
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,30 @@
 {
  "bos_token": {
    "content": "<|im_start|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "eos_token": {
    "content": "<|im_end|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "pad_token": {
    "content": "<|pad|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "unk_token": {
    "content": "<|unk|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  }
 }
--- a/tokenizer.json
+++ b/tokenizer.json
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,397 @@
 {
  "add_bos_token": false,
  "add_eos_token": false,
  "add_prefix_space": null,
  "added_tokens_decoder": {
    "0": {
      "content": "<|unk|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "1": {
      "content": "<|im_start|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "2": {
      "content": "<|im_end|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "49109": {
      "content": "<|pad|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "49110": {
      "content": "<tools>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49111": {
      "content": "</tools>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49112": {
      "content": "<tool_call>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49113": {
      "content": "</tool_call>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49114": {
      "content": "<tool_response>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49115": {
      "content": "</tool_response>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49116": {
      "content": "<think>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49117": {
      "content": "</think>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49118": {
      "content": "<answer>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49119": {
      "content": "</answer>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49120": {
      "content": "<context>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49121": {
      "content": "</context>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49122": {
      "content": "<|fim_prefix|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49123": {
      "content": "<|fim_suffix|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49124": {
      "content": "<|fim_middle|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49125": {
      "content": "<|repo_name|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49126": {
      "content": "<|image|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49127": {
      "content": "<|image_pad|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49128": {
      "content": "<|image_placeholder|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49129": {
      "content": "                        ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49130": {
      "content": "                       ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49131": {
      "content": "                      ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49132": {
      "content": "                     ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49133": {
      "content": "                    ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49134": {
      "content": "                   ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49135": {
      "content": "                  ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49136": {
      "content": "                 ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49137": {
      "content": "                ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49138": {
      "content": "               ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49139": {
      "content": "              ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49140": {
      "content": "             ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49141": {
      "content": "            ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49142": {
      "content": "           ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49143": {
      "content": "          ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49144": {
      "content": "         ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49145": {
      "content": "        ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49146": {
      "content": "       ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49147": {
      "content": "      ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49148": {
      "content": "     ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "49149": {
      "content": "    ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49150": {
      "content": "   ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "49151": {
      "content": "  ",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    }
  },
  "bos_token": "<|im_start|>",
  "bos_token_id": 1,
  "clean_up_tokenization_spaces": false,
  "eos_token": "<|im_end|>",
  "eos_token_id": 2,
  "extra_special_tokens": {},
  "legacy": false,
  "model_input_names": [
    "input_ids",
    "attention_mask"
  ],
  "model_max_length": 4096,
  "pad_token": "<|pad|>",
  "pad_token_id": 49109,
  "padding_side": "right",
  "sp_model_kwargs": {},
  "spaces_between_special_tokens": false,
  "tokenizer_class": "PreTrainedTokenizerFast",
  "truncation_side": "right",
  "unk_token": "<|unk|>",
  "unk_token_id": 0,
  "use_default_system_prompt": false
 }
--- a/train_logs_apo.parquet
+++ b/train_logs_apo.parquet
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:33c92a3850b512684a41eff4d9c336713270ed3158857d848cdc2db4df483f70
 size 46849
--- a/train_logs_sft.parquet
+++ b/train_logs_sft.parquet
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:c9f244d76ed1cabcccae0ac9461aa0c5313464cda19aaf67603a665a05d7d677
 size 67160
--- a/training_config_apo.yaml
+++ b/training_config_apo.yaml
@@ -0,0 +1,98 @@
 # Directory settings
 checkpoint_dir: "/polyglot/portuguese/checkpoints/models/Tucano2-qwen-0.5B-Think"
 train_dataset_dir:
  # Total: 13,649 samples (x5 epochs)
  # Harmfull samples (with reasoning): 4,008 samples
  - /polyglot/portuguese/gigaverbo-v2-dpo/harmfull-reasoning
  # Harmless samples (with reasoning): 9,641 samples
  - /polyglot/portuguese/gigaverbo-v2-dpo/harmless-reasoning
 val_dataset_dir: null
 dataset_type: "jsonl"
 cache_dir: "/lustre/mlnvme/data/polyglot/.cache"
 # Data loading settings
 pin_memory: true
 num_workers_for_dataloader: 16
 shuffle_dataset: true
 mask_eos_token: false
 mask_pad_token: false
 # Model architecture settings
 vocab_size: 49152
 num_hidden_layers: 28
 num_attention_heads: 16
 num_key_value_heads: 8
 head_dim: 128
 hidden_size: 1024
 intermediate_size: 3072
 max_position_embeddings: 4096
 tie_word_embeddings: true
 hidden_act: "silu"
 output_hidden_states: false
 attn_implementation: "flash_attention_2"
 use_cache: false
 no_rope_layer_interval: null
 rope_theta: 1000000.0
 rope_scale_factor: null
 rms_norm_eps: 0.000001
 # Training settings
 total_batch_size: 524288
 micro_batch_size: 4
 gradient_accumulation_steps: 4
 eval_micro_batch_size: null
 num_train_epochs: 5
 warmup_ratio: 0.1
 max_learning_rate: 0.000005
 min_learning_rate: 0.0
 muon_learning_rate: null
 weight_decay: 0.0
 beta1: 0.9
 beta2: 0.95
 eps: 0.00000001
 lr_decay_type: "cosine"
 use_sqrt: false
 lr_decay_iters_coef: 1.
 seed: 42
 max_steps: 535
 max_grad_norm: 1.0
 # APO settings
 loss_type: "apo_zero"
 dpo_beta: 0.5
 precompute_ref_log_probs: true
 truncation_mode: "keep_end"
 # Precision and optimization settings
 torch_compile: false
 mat_mul_precision: "highest"
 tf32: true
 bf16: true
 gradient_checkpointing: false
 use_liger_kernel: false
 static_graph: false
 # Hub settings
 push_to_hub: false
 hub_token: null
 hub_model_id: null
 # Tokenizer and Reference model
 tokenizer_name_or_path: "/polyglot/portuguese/checkpoints/models/Tucano2-qwen-0.5B-Think-SFT"
 chat_template_path: null
 reference_model: "/polyglot/portuguese/checkpoints/models/Tucano2-qwen-0.5B-Think-SFT"
 continual_pretraining: true
 # Checkpoint settings
 resume_from_checkpoint: null
 checkpointing_steps: 1000
 begin_new_stage: true
 stage_name: "single_cosine"
 # Miscellaneous settings
 sanity_check: false
 sanity_check_num_samples: 100000
 wandb_token: null
 wandb_id: "tucano2-qwen-0.5b-think-apo"
 wandb_project: "Polyglot"
 wandb_desc: "Developing LLMs for low-resource languages"
--- a/training_config_sft.yaml
+++ b/training_config_sft.yaml
@@ -0,0 +1,93 @@
 # Directory settings
 checkpoint_dir: "/polyglot/portuguese/checkpoints/models/Tucano2-qwen-0.5B-Think-SFT"
 train_dataset_dir:
  # Reasoning: ~34 million tokens (x5 epochs)
  - /polyglot/portuguese/gigaverbo-v2-sft/reasoning
 val_dataset_dir: null
 dataset_type: "jsonl"
 cache_dir: "/lustre/mlnvme/data/polyglot/.cache"
 # Data loading settings
 pin_memory: true
 num_workers_for_dataloader: 16
 shuffle_dataset: true
 mask_eos_token: false
 mask_pad_token: true
 # Model architecture settings
 vocab_size: 49152
 num_hidden_layers: 28
 num_attention_heads: 16
 num_key_value_heads: 8
 head_dim: 128
 hidden_size: 1024
 intermediate_size: 3072
 max_position_embeddings: 4096
 tie_word_embeddings: true
 hidden_act: "silu"
 output_hidden_states: false
 attn_implementation: "flash_attention_2"
 use_cache: false
 no_rope_layer_interval: null
 rope_theta: 1000000.0
 rope_scale_factor: null
 rms_norm_eps: 0.000001
 # Training settings
 total_batch_size: 524288
 micro_batch_size: 4
 gradient_accumulation_steps: 4
 eval_micro_batch_size: null
 num_train_epochs: 5
 warmup_ratio: 0.1
 max_learning_rate: 0.000085
 min_learning_rate: 0.0
 muon_learning_rate: null
 weight_decay: 0.0
 beta1: 0.9
 beta2: 0.95
 eps: 0.00000001
 lr_decay_type: "cosine"
 use_sqrt: false
 lr_decay_iters_coef: 1.
 seed: 42
 max_steps: 3060
 max_grad_norm: 1.0
 # SFT settings
 packing: false
 assistant_only_loss: true
 # Precision and optimization settings
 torch_compile: false
 mat_mul_precision: "highest"
 tf32: true
 bf16: true
 gradient_checkpointing: false
 use_liger_kernel: true
 static_graph: false
 # Hub settings
 push_to_hub: false
 hub_token: null
 hub_model_id: null
 # Tokenizer and Reference model
 tokenizer_name_or_path: "Polygl0t/Tucano2-qwen-0.5B-Base"
 chat_template_path: null
 reference_model: "Polygl0t/Tucano2-qwen-0.5B-Base"
 continual_pretraining: true
 # Checkpoint settings
 resume_from_checkpoint: null
 checkpointing_steps: 1000
 begin_new_stage: true
 stage_name: "single_cosine"
 # Miscellaneous settings
 sanity_check: false
 sanity_check_num_samples: 100000
 wandb_token: null
 wandb_id: "tucano2-qwen-0.5b-think-sft"
 wandb_project: "Polyglot"
 wandb_desc: "Developing LLMs for low-resource languages"