初始化项目,由ModelHub XC社区提供模型

Model: Polygl0t/Tucano2-qwen-0.5B-Think
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-04 14:20:49 +08:00
commit d2e30c7b41
22 changed files with 465611 additions and 0 deletions

41
.gitattributes vendored Normal file
View File

@@ -0,0 +1,41 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
logo.png filter=lfs diff=lfs merge=lfs -text
.plots/apo_gradient_norm.png filter=lfs diff=lfs merge=lfs -text
.plots/apo_reward.png filter=lfs diff=lfs merge=lfs -text
.plots/model_comparison.png filter=lfs diff=lfs merge=lfs -text
.plots/sft_gradient_norm.png filter=lfs diff=lfs merge=lfs -text
.plots/sft_loss.png filter=lfs diff=lfs merge=lfs -text

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0729b0ba79b79c3f9ae2becdfc986f9b9e7b6864e845292aaf78dc77e4535d93
size 543980

3
.plots/apo_reward.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a7fa6778f74ca9cf434eda73696e2cbaa1144d42a6e61827364881ff55c36065
size 286838

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:caa80f85de1854e5ae25089c2bd1ab0c3991c59e14d5ee290d78710fb42a5cb4
size 222035

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:54f1bfe1d67832bff0989e695765bfb444633bb504aa4602c240e4a00763f3d2
size 335390

3
.plots/sft_loss.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:fb8c563e02344e1a42c0d66771ec2e6c07445b47470565ec3812aff525570d40
size 387159

190
LICENSE Normal file
View File

@@ -0,0 +1,190 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright Nicholas Kluge Corrêa, Shiza Fatimah, Aniket Sen, and Sophia Falk
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

547
README.md Normal file
View File

@@ -0,0 +1,547 @@
---
language:
- pt
license: apache-2.0
library_name: transformers
tags:
- text-generation-inference
datasets:
- Polygl0t/gigaverbo-v2-sft
- Polygl0t/gigaverbo-v2-preferences
metrics:
- perplexity
pipeline_tag: text-generation
widget:
- text: "<|im_start|>user\nQual é a capital de Portugal?<|im_end|><|im_start|>assistant\n"
example_title: Exemplo
- text: "<|im_start|>user\nEscreva um poema sobre a floresta amazônica.<|im_end|><|im_start|>assistant\n"
example_title: Exemplo
- text: "<|im_start|>user\nListe três benefícios da energia solar.<|im_end|><|im_start|>assistant\n"
example_title: Exemplo
inference:
parameters:
repetition_penalty: 1.2
temperature: 0.1
top_k: 50
top_p: 1.0
max_new_tokens: 150
co2_eq_emissions:
emissions: 1230
source: CodeCarbon
training_type: post-training
geographical_location: Germany
hardware_used: NVIDIA A100-SXM4-80GB
model-index:
- name: Tucano2-qwen-0.5B-Think
results:
- task:
type: text-generation
name: Text Generation
dataset:
name: ARC Challenge
type: Polygl0t/ARC-poly
split: test
args:
num_few_shot: 5
metrics:
- type: acc_norm
value: 32.74
name: Acc-norm
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: arc_challenge_poly_pt
- task:
type: text-generation
name: Text Generation
dataset:
name: MMLU
type: Polygl0t/MMLU-poly
split: test
args:
num_few_shot: 5
metrics:
- type: acc
value: 36.08
name: Acc
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: mmlu_poly_pt
- task:
type: text-generation
name: Text Generation
dataset:
name: BELEBELE
type: facebook/belebele
split: test
args:
num_few_shot: 5
metrics:
- type: acc_norm
value: 36.11
name: Acc-norm
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: belebele_por_Latn
- task:
type: text-generation
name: Text Generation
dataset:
name: BLUEX
type: eduagarcia-temp/BLUEX_without_images
split: train
args:
num_few_shot: 3
metrics:
- type: acc
value: 34.49
name: Acc
source:
url: https://github.com/eduagarcia/lm-evaluation-harness-pt
name: bluex
- task:
type: text-generation
name: Text Generation
dataset:
name: ENEM Challenge
type: eduagarcia/enem_challenge
split: train
args:
num_few_shot: 3
metrics:
- type: acc
value: 31.98
name: Acc
source:
url: https://github.com/eduagarcia/lm-evaluation-harness-pt
name: enem_challenge
- task:
type: text-generation
name: Text Generation
dataset:
name: OAB Exams
type: eduagarcia/oab_exams
split: train
args:
num_few_shot: 3
metrics:
- type: acc
value: 27.02
name: Acc
source:
url: https://github.com/eduagarcia/lm-evaluation-harness-pt
name: oab_exams
- task:
type: text-generation
name: Text Generation
dataset:
name: IFEval
type: Polygl0t/IFEval-PT
split: train
args:
num_few_shot: 0
metrics:
- type: ifeval_pt_prompt_level_loose_acc
value: 27.67
name: Acc-loose
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: ifeval_pt
- task:
type: text-generation
name: Text Generation
dataset:
name: GSM8K
type: Polygl0t/gsm8k-pt
split: test
args:
num_few_shot: 0
metrics:
- type: flexible-extract
value: 14.61
name: Acc-flex
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: gsm8k_pt
base_model: Polygl0t/Tucano2-qwen-0.5B-Base
---
# Tucano2-qwen-0.5B-Think
<img src="./logo.png" alt="An illustration of a Tucano bird showing vibrant colors like yellow, orange, blue, green, and black." height="200">
## Model Summary
**[Tucano2-qwen-0.5B-Think](https://huggingface.co/Polygl0t/Tucano2-qwen-0.5B-Think)** is an instruction-tuned Portuguese language model built on top of **Tucano2-qwen-0.5B-Base**. It has been trained using a combination of one round of supervised fine-tuning (SFT) and one round of Anchored Preference Optimization (APO).
Tucano2-qwen-0.5B-Think is a reasoning model, which means it has been fine-tuned to generate CoT-style (Chain-of-Thought) traces in its responses. These reasoning traces are always encapsulated within the special tokens `<think>` and `</think>`.
**All datasets, source code, and training recipes used to develop the Tucano2 series are fully open and reproducible.**
## Details
- **Architecture:** a Transformer-based model ([`qwen3`](https://huggingface.co/docs/transformers/main/en/model_doc/qwen3))
- **Size:** 490,799,104 parameters
- **Context length:** 4,096 tokens
- **Dataset(s):**
- [Polygl0t/gigaverbo-v2-sft](https://huggingface.co/datasets/Polygl0t/gigaverbo-v2-sft)
- [Polygl0t/gigaverbo-v2-preferences](https://huggingface.co/datasets/Polygl0t/gigaverbo-v2-preferences)
- **Training time**: ~ 1.5 hours
- **Emissions:** 1.23 KgCO2 (Germany)
- **Total energy consumption:** 2.66 kWh
This repository has the [source code](https://github.com/Polygl0t/llm-foundry) used to train this model. The full configuration used for training is available in the following config files:
- Single stage Supervised Fine-Tuning (linear warmup with cosine decay): [training_config_sft.yaml](training_config_sft.yaml)
- Single stage Anchored Preference Optimization (linear warmup with cosine decay): [training_config_apo.yaml](training_config_apo.yaml)
- Training Logs (loss, lr, rewards, etc.): [train_logs_apo.parquet](train_logs_apo.parquet), [train_logs_sft.parquet](train_logs_sft.parquet)
<details>
<summary><b>SFT Loss Curve</b></summary>
![SFT Loss Curve](./.plots/sft_loss.png)
</details>
<details>
<summary><b>APO Rewards</b></summary>
![APO Rewards](./.plots/apo_reward.png)
</details>
## Intended Uses
The primary intended use Tucano2-qwen-0.5B-Think is to serve as foundations for research and development involving Portuguese language modeling. You may also fine-tune and adapt Tucano2-qwen-0.5B-Think for deployment if your use follows the Apache 2.0 license. If you decide to use Tucano2-qwen-0.5B-Think as a basis for your fine-tuned model, please conduct your own risk and bias assessment.
## Basic usage
```python
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig
import torch
# Load model and tokenizer
model_id = "Polygl0t/Tucano2-qwen-0.5B-Think"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
device_map="auto"
)
# Configure generation parameters
generation_config = GenerationConfig(
do_sample=True,
temperature=0.1,
top_k=50,
top_p=1.0,
repetition_penalty=1.2,
max_new_tokens=150,
pad_token_id=tokenizer.eos_token_id,
)
# Prepare chat messages
messages = [
{"role": "user", "content": "Qual é a capital de Angola?"}
]
# Apply chat template and generate
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
with torch.no_grad():
outputs = model.generate(**inputs, generation_config=generation_config)
# Decode and print response
full_output = tokenizer.decode(
outputs[0][len(inputs.input_ids[0]):],
skip_special_tokens=True
).strip()
# Extract <think>...</think> content
think_content = None
final_response = full_output
if "<think>" in full_output and "</think>" in full_output:
start = full_output.find("<think>") + len("<think>")
end = full_output.find("</think>")
think_content = full_output[start:end].strip()
# Remove think block from final response
final_response = (
full_output[:full_output.find("<think>")] +
full_output[end + len("</think>"):]
).strip()
if think_content:
print("🧠 Thinking:\n")
print(think_content)
print("\n" + "="*50 + "\n")
print("🤖 Answer:\n")
print(final_response)
```
## Limitations
Like almost all other language models trained on large text datasets scraped from the web, the Tucano2-qwen-0.5B-Think shows behavior that does not make it an out-of-the-box solution to many real-world applications, especially those requiring factual, reliable, and nontoxic text generation. Tucano2-qwen-0.5B-Think is subject to the following:
- **Hallucinations:** Tucano2-qwen-0.5B-Think can produce content that can be mistaken as true facts, but are misleading or entirely false, i.e., hallucination.
- **Biases and Toxicity:** Tucano2-qwen-0.5B-Think inherits the social and historical stereotypes from the data used to train it. Given these biases, the model can produce toxic content, i.e., harmful, offensive, or detrimental to individuals, groups, or communities.
- **Language Limitations:** Tucano2-qwen-0.5B-Think is primarily designed to interact with Portuguese. Other languages might challenge its comprehension, leading to potential misinterpretations or errors in response.
- **Repetition and Verbosity:** Tucano2-qwen-0.5B-Think may get stuck on repetition loops (especially if the repetition penalty during generations is set to a meager value) or produce verbose responses unrelated to the prompt it was given.
Hence, even though Tucano2-qwen-0.5B-Think is released with a permissive license, we urge users to perform their risk analysis on them if they intend to use them for real-world applications.
## Evaluations
The table below compares the Tucano2 (Think variant) series against other reasoning models of similar size. We divide our evaluations into two sets:
- **Knowledge & Reasoning:** ARC-Challenge, ENEM, BLUEX, OAB Exams, BELEBELE, MMLU, GSM8K-PT
- **Instruction Following:** IFEval-PT
The NPM (Normalized Performance Metric) provides a balanced view of model performance across tasks, accounting for each task's inherent difficulty by normalizing its evaluation score relative to its random baseline.
We do not include coding benchmarks in this table because the Think models were not trained on coding data during post-training and thus perform poorly on them. For coding skills, we recommend using the Instruct models instead, which were trained with coding data and perform much better on coding benchmarks.
| | Total Avg. | Knowledge & Reasoning (NPM) | Instruction Following |
| --------------------------- | ---------- | --------------------------- | --------------------- |
| **Tucano2-qwen-3.7B-Think** | 51.27 | 54.07 | 31.67 |
| SmolLM3-3B | 48.58 | 46.28 | 64.67 |
| Qwen3-4B | 46.35 | 40.97 | 84 |
| Qwen3-1.7B | 36.54 | 32 | 68.33 |
| **Tucano2-qwen-1.5B-Think** | 27.54 | 26.67 | 33.67 |
| Qwen3-0.6B | 24.11 | 19.22 | 58.33 |
| **Tucano2-qwen-0.5B-Think** | 14.41 | 12.52 | 27.67 |
<details>
<summary><b>Evaluation Suite</b></summary>
| **Benchmark** | **n-shot** | **Type** | **Baseline** | **Metric** |
| ------------------------- | ---------- | ------------- | ------------ | ------------------------ |
| **Knowledge & Reasoning** | | | | |
| ARC-Challenge | 5-shot | MC-Q&A | 25 | `acc_norm` |
| ENEM | 3-shot | MC-Q&A | 20 | `acc` |
| BLUEX | 3-shot | MC-Q&A | 22.5 | `acc` |
| OAB Exams | 3-shot | MC-Q&A | 25 | `acc` |
| BELEBELE | 5-shot | MC-Q&A | 25 | `acc_norm` |
| MMLU | 5-shot | MC-Q&A | 25 | `acc` |
| GSM8K-PT | 0-shot | Math Problems | 0 | `flexible-extract` |
| **Instruction Following** | | | | |
| IFEval-PT | 0-shot | Instruction | 0 | `prompt_level_loose_acc` |
</details>
<details>
<summary><b>Individual Benchmarks</b></summary>
| | BLUEX | ENEM | OAB | ARC Challenge | BELEBELE | MMLU | IFEval-PT | GSM8K-PT |
| --------------------------- | ----- | ----- | ----- | ------------- | -------- | ----- | --------- | -------- |
| **Tucano2-qwen-3.7B-Think** | 63 | 71.52 | 53.76 | 55.38 | 83.56 | 61.18 | 31.67 | 56.7 |
| SmolLM3-3B | 48.82 | 60.6 | 42.19 | 52.56 | 77.78 | 52.82 | 64.67 | 72.15 |
| Qwen3-4B | 78.3 | 85.23 | 47.97 | 39.66 | 23 | 31.53 | 84 | 77.02 |
| Qwen3-1.7B | 68.29 | 72.5 | 39.32 | 35.64 | 22.89 | 22.85 | 68.33 | 71.69 |
| **Tucano2-qwen-1.5B-Think** | 39.22 | 39.89 | 34.26 | 42.82 | 67.67 | 43.3 | 33.67 | 22.83 |
| Qwen3-0.6B | 43.53 | 48.71 | 34.21 | 29.91 | 22.89 | 22.85 | 58.33 | 58.37 |
| **Tucano2-qwen-0.5B-Think** | 34.49 | 31.98 | 27.02 | 32.74 | 36.11 | 36.08 | 27.67 | 14.61 |
</details>
<details>
<summary><b>Inference Samples on Various Tasks</b></summary>
### Cooking
**Prompt:**
<|im_start|>user
Você pode me dar uma receita simples de Bolo de Brigadeiro?<|im_end|>
<|im_start|>assistant
**Response:**
<think>
O usuário está me pedindo uma receita simples de bolo de brigadeiro. Preciso garantir que a resposta seja útil e fácil de seguir, então vou começar explicando brevemente o que é um bolo de brigadeiro para contextualizar sua solicitação. Agora, preciso listar os ingredientes necessários em quantidades moderadas para evitar sobrecarga na lista inicial. Vou considerar itens básicos como farinha, açúcar, chocolate amargo picado, leite condensor, essência de baunilha, canela moída, coco ralado e raspas de laranja. É importante mencionar as precauções iniciais ao lidar com alguns desses ingredientes, especialmente quando se trata do chocolate derretido durante o forno. Devo também sugerir algumas dicas adicionais para tornar a experiência ainda mais agradável, como misturar bem todos os ingredientes antes de colocar no forno e decorá-lo depois. Finalizo lembrando novamente que tudo deve estar pronto dentro das primeiras cinco minutos após assar.
</think>
Claro! Aqui vai uma receitinha básica de **Bolinho de Brigadeirão**:
### Ingredientes:
- 2 ovos grandes
- 1 xícara (chá) de açúcar mascavo ou mel (180g)
- 1/2 colher (sopa) de cacau em pó sem adoçante (30% ativo butylated buttercreme)
- 3/4 xícara (cha) de leite integral (250ml)
- 1/4 xícara (cha) de óleo vegetal neutro (20g)
- 3 colheres (sopa) de farinha de trigo fina (200g)
- Canela moida à gosto
- Coco ralado fresco ou seco (1/2 colher (sobremesa))
- Raspa(s) de limão siciliano (2 unidades)
### Modo de Preparo:
1. Em uma panela média, misture todos os ingredientes secos (farinha, cacaue, etc.) até formar uma massa homogênea. Reserve.
2. No liquidificador, bata juntos os ovos, o açúcar/mel, o leite, o óleo, a canela e o coco ralado. Adicione gradualmente o restante dos ingredientes secos enquanto continua batendo até obter uma mistura lisa e aerada.
3. Transfira a mistura para uma forma redonda untada e enfarinhada. Leve ao forno pré-aquecido a 180°C (aproximadamente 350°F). Asse por cerca de 25-30 min., virando metade do caminho para cozinhar uniformemente.
4. Retire do forno e deixe esfriar ligeiramente antes de cortar e servir. Decore com as raspas de limão caso goste!
Espero que você aproveite esse delicioso Bolinho de Brigadeirão! Qualquer dúvida adicional, estou aqui pra ajudar 😊.<|im_end|>
---
### Math
**Prompt:**
<|im_start|>user
Como eu posso resolver o seguinte problema: 2x + 3 = 11?<|im_end|>
<|im_start|>assistant
**Response:**
<think>
O usuário está me perguntando como resolver a equação linear 2x + 3 = 11. Preciso fornecer uma solução passo a passo para garantir que ele entenda cada etapa do raciocínio. Primeiro, vou identificar os elementos principais da equação: x é desconhecido e precisa ser isolado; ambos lados devem permanecer iguais (lei de conservação); há um termo constante no lado esquerdo (3). Agora preciso isolar o termo com 'x'. Para fazer isso, subtrairo 3 de ambos os lados porque estou fazendo a mesma operação em ambas as partes. Isso elimina o número 3, deixando apenas 'x' sozinho na esquerda. Deixe-me realizar esta subtração agora: 2x - 3 - 3 = 11 - 3. Simplificando, tenho 2x = 8. Com 'x' isolado, minha próxima tarefa será dividir ambos os lados por 2, já que estamos tentando encontrar o valor original de 'x', não sua derivada ou multiplicador. Realizando essa divisão, obtenho x = 4. Finalmente, formulo a resposta explicando ao usuário que 'x = 4' satisfaz a equação inicial 2x + 3 = 11, pois quando substituímos 4, obtemos 2*4 + 3 = 8, que simplifica para 8 + 3 = 11, confirmando assim a correção do cálculo.
</think>
Claro! Vamos resolver a equação \(2x + 3 = 11\) juntos. A ideia principal aqui é isolarmos a variável \(x\), geralmente movendo todos os outros termos para o outro lado da igualdade. Aqui estão os passos:
1. **Subtraia 3 de ambos os lados:**
- Começamos com a equação: \[2x + 3 = 11\]
- Subtraindo 3 de ambos os lados: \[2(x + 3) = 10\]
2. **Divida ambos os lados por 2:**
- Agora temos: \[(x + 3) = 5\]
- Dividindo ambos os lados por 2: \[x = 5 - 3\]
3. **Simplifique:**
- Chegamos à conclusão final: \[x = 2\]
Portanto, a solução para a equação \(2x + 3 = 11\) é \(x = 2\).
Se você tiver mais perguntas ou precisar de ajuda com outra equação, estou aqui para ajudar!<|im_end|>
---
### Rewriting
**Prompt:**
<|im_start|>user
Remova todas as informações irrelevantes da frase a seguir.
"O novo restaurante que abriu no centro da cidade, de propriedade do primo de Jorge, que costumava ser chef de um restaurante em Ipanema, serve uma variedade de cozinhas de todo o mundo."<|im_end|>
<|im_start|>assistant
**Response:**
<think>
O usuário está me pedindo para remover quaisquer informações irrelevantes ou desnecessárias da seguinte frase: 'O novo restaurante que abriu no centro da cidade, de propriedade do primo de Jorge, que costumava ser chef de um restaurante em Ipanema, serve uma variedade de cozinhas de todo o mundo.' Primeiro, vou ler cuidadosamente toda a estrutura gramatical e identificar os principais elementos da sentença. A frase contém vários componentes importantes: sujeito ('novo restaurante'), localização ('centro da cidade') e características específicas (propriedade do primo de Jorge). Agora preciso considerar quais partes podem ser removidas sem alterar significativamente o sentido geral da mensagem. Devo evitar mencionar pessoas ausentes, lugares específicos não utilizados além das indicações básicas, nem detalhes técnicos excessivos como nomes próprios ou termos complicados. Vou começar pela eliminação dos pronomes pessoais e adjetivos redundantemente repetidos. Posso simplificar 'que costumava' usando apenas 'costumava', mantendo assim a essência original. Também posso substituir 'chef' por algo mais genérico relacionado ao trabalho culinário, como 'cozinheiro'. Além disso, eliminar algumas palavras extras pode deixar a frase mais concisa. Finalmente, revisarei tudo novamente para garantir que nenhum elemento importante tenha sido omitido durante esse processo de limpeza. Após essas modificações, obtenho esta versão refinada da frase: 'Um novo estabelecimento gastronômico inaugurado na área central da cidade...serve diversos estilos de cozinha internacionalmente reconhecidos.' Essa reformulação preservou todos os aspectos relevantes enquanto reduziu consideravelmente seu comprimento inicial.
</think>
Claro! Aqui está a frase após a remoção das informações irrelevantes:
> Um novo estabelecimento gastronômico inaugurado na área central da cidade...serve diversos estilos de cozinha internacionalmente reconhecidos.
Notei alguns ajustes menores aqui:
- Substituí "primo de Jorge" pelo nome próprio equivalente ("Jorge") porque parece haver algum erro tipográfico nesse caso específico. No entanto, isso foi feito apenas para corrigir possíveis inconsistências; eu mantenho a grafia exata conforme apareceu originalmente nos seus dados.
- Ajustei ligeiramente outros termos para maior clareza, mas manteve-se fiel aos originais. Por exemplo, usei "estabelecimento gastronômico" em vez de "restaurante".<|im_end|>
---
### Science Explanation
**Prompt:**
<|im_start|>user
Explique a teoria da relatividade de forma simples (ELI5).<|im_end|>
<|im_start|>assistant
**Response:**
<think>
O usuário está me pedindo para explicar a Teoria da Relatividade em ELI5. Preciso garantir que minha resposta seja clara e concisa, mantendo o foco nos principais pontos. Primeiro, vou lembrar os elementos básicos dessa teoria: especial e geral. Agora, preciso estruturar meu raciocínio de maneira lógica e fácil de entender. Começo pela introdução do paradoxo dos gêmeos, pois ele ilustra bem as ideias centrais da teoria. Depois, passo às duas versões específicas da teoria — especial e geral —, destacando suas diferenças cruciais. Finalizo com exemplos práticos para ilustrar melhor cada ponto. Vou revisar rapidamente todos esses tópicos antes de formular a resposta completa.
</think>
Claro! A **Teoria da Relatividade** é um conjunto de princípios formulados por Albert Einstein no início do século XX. Ela divide-se basicamente entre dois aspectos principais:
1. **A Teoria Especial da Relatividade (1905)** - Esta parte trata das leis físicas invariantes sob transformações de Lorentz, ou seja, como essas leis funcionam independentemente do movimento relativo de observadores diferentes. Um exemplo famoso disso é o paradoxo dos gêmeos, onde um irmão viaja pelo espaço enquanto seu amigo fica parado aqui na Terra. No final, quem volta mais jovem porque correu menos tempo durante sua viagem espacial.
2. **A Generalização da Teoria Especial da Relatividade (1915)** - Aqui, Einstein expande ainda mais seus conceitos anteriores, introduzindo a ideia de gravidade através de campos tensoriais, não apenas força gravitacional convencional. Isso levou à formulação da Equação de Friedewald, embora ela tenha sido inicialmente proposta separadamente por Hendrik Lorentz.
Em termos simplificados, podemos dizer que a Teoria da Relatividade tenta unificar três grandes questões fundamentais sobre o universo:
- Como medimos distâncias e tempos dependendo do nosso estado de movimento?
- Existe uma relação universal entre massa e energia descrita pela famosa equação \( E = mc^2 \)?
- Gravidade realmente afeta tudo ao redor dela? Ou seria algo diferente?
Essas perguntas foram respondidas parcialmente pelas teorias modernas, mas continuam sendo objeto intenso de pesquisa científica até hoje. Espero que isso ajude você a entender alguns pontos importantes! Se tiver alguma outra dúvida específica, estou aqui pra ajudar.<|im_end|>
---
### History
**Prompt:**
<|im_start|>user
Liste os principais eventos envolvendo a Revolução Farroupilha.<|im_end|>
<|im_start|>assistant
**Response:**
<think>
O usuário está me perguntando para listar os principais eventos da Revolução Farroupilha, também conhecida como Guerra dos Dezos. Preciso garantir que minha resposta seja precisa e abrangente, cobrindo todos os momentos cruciais dessa importante fase da história do Rio Grande do Sul. Vou começar organizando meus pensamentos em categorias lógicas: início da guerra (1845), promulgação de leis (1844), batalhas significativas (1836-1845) e consequências pós-guerra (1883). Agora, vou estruturar cada categoria com exemplos específicos de eventos importantes dentro delas. Para iniciar, o evento inicial é fundamental: as tropas farroupilhas lideradas por Bento Gonçalves desembarcam na Laguna em 1849. Isso marca oficialmente o começo da revolta. Em seguida, vem a assinatura do Tratado de Ponche Verde em 1845, onde os rebeldes aceitam termos favoráveis aos interesses sulinos. Outro momento crucial são as Leis Provinciais nºs 1 e 2 de 1844, que estabelecem autonomia provincial e controle local sobre questões locais, respectivamente. A Batalha de Porongos continua sendo vital, pois mostra heroísmo militar e simboliza unidade entre diferentes facções. Após esse período turbulento, há decisões políticas críticas, especialmente aquelas relacionadas à formação das Províncias Unidas do Rio Grande do Sul e ao apoio imperial através do Decreto Imperial nº 5.246. As Guerras Civis subsequentes continuam até sua derrota definitiva em 1883. Finalmente, preciso mencionar brevemente algumas repercussões duradouras, incluindo influências culturais e econômicas no estado atual. Com esses elementos claros, posso agora redigir uma lista completa e coesa.
</think>
Claro! Aqui estão alguns dos principais eventos relacionados à Revolução Farroupilha (Guerra dos Debates):
1. **Início da Guerra (1845-1845)**:
- As forças farroupilhas iniciaram seu levante após um incidente naval perto de São Gabriel, RS. Inicialmente liderados pelo general David Canabarro, eles rapidamente ganharam apoio popular nas regiões litorâneas.
2. **Promulgações Legislativas (1844)**:
- Foi promulgada a Lei Provincial número 1, estabelecendo eleições livres nos municípios;
- Seguidamente, foi sancionada a Lei Provincial número 2, declarando todo território gaúcho livre de impostos provinciais.
3. **Batalhas Significativas (18361845)**:
- **Laguna**: Uma vitória decisiva das tropas imperiais sob comando do marechal Carlos Frederico Lenzi, marcando formalmente o fim imediato da rebelião.
- **Porongos**: Um massacre sangrento durante a rendição final das forças farrapas, protagonizado pela escrava Zacimba Gala e seus companheiros, simbolizando união nacional mesmo diante da adversidade extrema.
4. **Consequências Pós-Guerra (18681883)**:
- Os remanescentes insurretos fundaram duas novas províncias republicanas: República Juliana (1918) e Província Independente do Rio Grande do Sul (1920).
- O governo central retomou parcialmente suas possessões sulinas mediante negociação diplomática culminando no Tratado de Ponche Verde em 1845.
Esses pontos representam apenas parte significativa desses acontecimentos históricos complexos. Espero que isso ajude você a entender melhor a contextura desta importante luta regional brasileira!<|im_end|>
---
</details>
Below, we compare the performance of Tucano2-qwen-0.5B-Think with Qwen3-0.6B, a strong baseline in the 0.5B parameter range. The percentages represent the absolute difference in performance between the two models on each benchmark. All other plots can be found in the [.plots](https://huggingface.co/Polygl0t/Tucano2-qwen-0.5B-Think/tree/main/.plots/) folder.
**Tucano2-qwen-0.5B-Think vs Qwen3-0.6B**
![Performance Comparison](./.plots/model_comparison.png)
## Cite as 🤗
```latex
@misc{correa2026tucano2cool,
title={{Tucano 2 Cool: Better Open Source LLMs for Portuguese}},
author={Nicholas Kluge Corr{\^e}a and Aniket Sen and Shiza Fatimah and Sophia Falk and Lennard Landgraf and Julia Kastner and Lucie Flek},
year={2026},
eprint={2603.03543},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2603.03543},
}
```
## Aknowlegments
Polyglot is a project funded by the Federal Ministry of Education and Research (BMBF) and the Ministry of Culture and Science of the State of North Rhine-Westphalia (MWK) as part of TRA Sustainable Futures (University of Bonn) and the Excellence Strategy of the federal and state governments.
We also gratefully acknowledge the granted access to the [Marvin cluster](https://www.hpc.uni-bonn.de/en/systems/marvin) hosted by [University of Bonn](https://www.uni-bonn.de/en) along with the support provided by its High Performance Computing & Analytics Lab.
## License
Tucano2-qwen-0.5B-Think is licensed under the Apache License, Version 2.0. For more details, see the [LICENSE](LICENSE) file.

114
chat_template.jinja Normal file
View File

@@ -0,0 +1,114 @@
{#- Handle tool/function calling setup #}
{%- if tools %}
{{- '<|im_start|>system\n' }}
{#- Include system message if present #}
{%- if messages[0].role == 'system' %}
{{- messages[0].content + '\n\n' }}
{%- endif %}
{#- Add tool calling instructions in Portuguese #}
{{- "# Tools / Ferramentas\n\nVocê pode chamar uma ou mais funções para auxiliar na consulta do usuário.\n\nVocê recebe assinaturas de funções dentro de tags XML <tools></tools>:\n<tools>" }}
{%- for tool in tools %}
{{- "\n" }}
{{- tool | tojson }}
{%- endfor %}
{{- "\n</tools>\n\nPara cada chamada de função, retorne um objeto json com o nome da função e os argumentos dentro das tags XML <tool_call></tool_call>:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
{%- else %}
{#- Standard system message without tools #}
{%- if messages[0].role == 'system' %}
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{#- Detect multi-step tool usage by finding the last real user query #}
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
{%- for message in messages[::-1] %}
{%- set index = (messages|length - 1) - loop.index0 %}
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
{%- set ns.multi_step_tool = false %}
{%- set ns.last_query_index = index %}
{%- endif %}
{%- endfor %}
{#- Process each message in the conversation #}
{%- for message in messages %}
{#- Normalize content to string #}
{%- if message.content is string %}
{%- set content = message.content %}
{%- else %}
{%- set content = '' %}
{%- endif %}
{#- Handle user messages and non-first system messages #}
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
{#- Handle assistant messages with reasoning #}
{%- elif message.role == "assistant" %}
{#- Extract reasoning content if present #}
{%- set reasoning_content = '' %}
{%- if message.reasoning_content is string %}
{%- set reasoning_content = message.reasoning_content %}
{%- else %}
{#- Parse <think></think> tags from content #}
{%- if '</think>' in content %}
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
{%- endif %}
{%- endif %}
{{- '<|im_start|>' + message.role }}
{% generation %}
{#- Add reasoning tags for messages after last user query #}
{%- if loop.index0 > ns.last_query_index %}
{%- if loop.last or (not loop.last and reasoning_content) %}
{{- '<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
{%- else %}
{{- content }}
{%- endif %}
{%- else %}
{{- content }}
{%- endif %}
{#- Add tool calls if present #}
{%- if message.tool_calls %}
{%- for tool_call in message.tool_calls %}
{%- if (loop.first and content) or (not loop.first) %}
{{- '\n' }}
{%- endif %}
{#- Normalize tool call format #}
{%- if tool_call.function %}
{%- set tool_call = tool_call.function %}
{%- endif %}
{{- '<tool_call>\n{"name": "' }}
{{- tool_call.name }}
{{- '", "arguments": ' }}
{%- if tool_call.arguments is string %}
{{- tool_call.arguments }}
{%- else %}
{{- tool_call.arguments | tojson }}
{%- endif %}
{{- '}\n</tool_call>' }}
{%- endfor %}
{%- endif %}
{{- '<|im_end|>' }}
{% endgeneration %}
{#- Handle tool response messages #}
{%- elif message.role == "tool" %}
{#- Group consecutive tool responses under one user message #}
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
{{- '<|im_start|>user' }}
{%- endif %}
{{- '\n<tool_response>\n' }}
{{- content }}
{{- '\n</tool_response>' }}
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
{{- '<|im_end|>\n' }}
{%- endif %}
{%- endif %}
{%- endfor %}
{#- Add generation prompt if requested #}
{%- if add_generation_prompt %}
{{- '<|im_start|>assistant\n' }}
{%- endif %}

61
config.json Normal file
View File

@@ -0,0 +1,61 @@
{
"architectures": [
"Qwen3ForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"dtype": "bfloat16",
"eos_token_id": 2,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_types": [
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention"
],
"max_position_embeddings": 4096,
"max_window_layers": 28,
"model_type": "qwen3",
"num_attention_heads": 16,
"num_hidden_layers": 28,
"num_key_value_heads": 8,
"pad_token_id": 49109,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"rope_theta": 1000000,
"sliding_window": null,
"tie_word_embeddings": true,
"transformers_version": "4.57.3",
"use_cache": false,
"use_sliding_window": false,
"vocab_size": 49152
}

207
evals.yaml Normal file
View File

@@ -0,0 +1,207 @@
evaluations:
arc_challenge_poly_pt_acc: 0.30427350427350425
arc_challenge_poly_pt_acc_norm: 0.32735042735042735
arc_challenge_poly_pt_acc_norm_stderr: 0.013724408490743929
arc_challenge_poly_pt_acc_stderr: 0.013456870841978025
arc_challenge_poly_pt_alias: arc_challenge_poly_pt
assin2_rte_acc,all: 0.5
assin2_rte_acc_stderr,all: 0.007138073526203373
assin2_rte_alias: assin2_rte
assin2_rte_f1_macro,all: 0.3333333333333333
assin2_rte_f1_macro_stderr,all: 0.003173025139437921
assin2_sts_alias: assin2_sts
assin2_sts_mse,all: 3.059456699346405
assin2_sts_mse_stderr,all: N/A
assin2_sts_pearson,all: 0.0276213074062244
assin2_sts_pearson_stderr,all: 0.011665375504884477
assin_entailment_acc: 0.7335
assin_entailment_acc_stderr: 0.006991541883910775
assin_entailment_alias: assin_entailment
assin_paraphrase_acc: 0.71575
assin_paraphrase_acc_stderr: 0.0071327206100355
assin_paraphrase_alias: assin_paraphrase
belebele_por_Latn_acc: 0.3611111111111111
belebele_por_Latn_acc_norm: 0.3611111111111111
belebele_por_Latn_acc_norm_stderr: 0.016019658270537297
belebele_por_Latn_acc_stderr: 0.016019658270537297
belebele_por_Latn_alias: belebele_por_Latn
bluex_acc,all: 0.34492350486787204
bluex_acc,exam_id__UNICAMP_2018: 0.4074074074074074
bluex_acc,exam_id__UNICAMP_2019: 0.28
bluex_acc,exam_id__UNICAMP_2020: 0.36363636363636365
bluex_acc,exam_id__UNICAMP_2021_1: 0.2391304347826087
bluex_acc,exam_id__UNICAMP_2021_2: 0.23529411764705882
bluex_acc,exam_id__UNICAMP_2022: 0.2564102564102564
bluex_acc,exam_id__UNICAMP_2023: 0.5581395348837209
bluex_acc,exam_id__UNICAMP_2024: 0.4
bluex_acc,exam_id__USP_2018: 0.3148148148148148
bluex_acc,exam_id__USP_2019: 0.3
bluex_acc,exam_id__USP_2020: 0.30357142857142855
bluex_acc,exam_id__USP_2021: 0.36538461538461536
bluex_acc,exam_id__USP_2022: 0.3673469387755102
bluex_acc,exam_id__USP_2023: 0.38636363636363635
bluex_acc,exam_id__USP_2024: 0.4146341463414634
bluex_acc_stderr,all: 0.010242775047627772
bluex_acc_stderr,exam_id__UNICAMP_2018: 0.038534741152469736
bluex_acc_stderr,exam_id__UNICAMP_2019: 0.0367961057296462
bluex_acc_stderr,exam_id__UNICAMP_2020: 0.037368794971719924
bluex_acc_stderr,exam_id__UNICAMP_2021_1: 0.036222183210060134
bluex_acc_stderr,exam_id__UNICAMP_2021_2: 0.034079376824662555
bluex_acc_stderr,exam_id__UNICAMP_2022: 0.04027891922030821
bluex_acc_stderr,exam_id__UNICAMP_2023: 0.04379564728920204
bluex_acc_stderr,exam_id__UNICAMP_2024: 0.04209744896628822
bluex_acc_stderr,exam_id__USP_2018: 0.03647519527201769
bluex_acc_stderr,exam_id__USP_2019: 0.04181463130976304
bluex_acc_stderr,exam_id__USP_2020: 0.03539764734011866
bluex_acc_stderr,exam_id__USP_2021: 0.03859991685452418
bluex_acc_stderr,exam_id__USP_2022: 0.0397939306907615
bluex_acc_stderr,exam_id__USP_2023: 0.042529993265009204
bluex_acc_stderr,exam_id__USP_2024: 0.04456715995150719
bluex_alias: bluex
calame_pt_acc: 0.09489402697495183
calame_pt_acc_stderr: 0.006433689590297553
calame_pt_alias: calame_pt
calame_pt_perplexity: 8435.632130447184
calame_pt_perplexity_stderr: 963.9665411064852
enem_challenge_acc,all: 0.3198040587823653
enem_challenge_acc,exam_id__2009: 0.3130434782608696
enem_challenge_acc,exam_id__2010: 0.38461538461538464
enem_challenge_acc,exam_id__2011: 0.29914529914529914
enem_challenge_acc,exam_id__2012: 0.31896551724137934
enem_challenge_acc,exam_id__2013: 0.37037037037037035
enem_challenge_acc,exam_id__2014: 0.3394495412844037
enem_challenge_acc,exam_id__2015: 0.3025210084033613
enem_challenge_acc,exam_id__2016: 0.3140495867768595
enem_challenge_acc,exam_id__2016_2: 0.2764227642276423
enem_challenge_acc,exam_id__2017: 0.25
enem_challenge_acc,exam_id__2022: 0.3233082706766917
enem_challenge_acc,exam_id__2023: 0.34814814814814815
enem_challenge_acc_stderr,all: 0.007148951828535609
enem_challenge_acc_stderr,exam_id__2009: 0.024960208775102345
enem_challenge_acc_stderr,exam_id__2010: 0.025924496341894532
enem_challenge_acc_stderr,exam_id__2011: 0.02449152244622838
enem_challenge_acc_stderr,exam_id__2012: 0.024889831032253663
enem_challenge_acc_stderr,exam_id__2013: 0.02682596737556379
enem_challenge_acc_stderr,exam_id__2014: 0.026210250439613336
enem_challenge_acc_stderr,exam_id__2015: 0.02434000254473757
enem_challenge_acc_stderr,exam_id__2016: 0.024414986986171384
enem_challenge_acc_stderr,exam_id__2016_2: 0.023342214704782527
enem_challenge_acc_stderr,exam_id__2017: 0.02314953920904528
enem_challenge_acc_stderr,exam_id__2022: 0.023367019181975722
enem_challenge_acc_stderr,exam_id__2023: 0.023749430869741607
enem_challenge_alias: enem
faquad_nli_acc,all: 0.7846153846153846
faquad_nli_acc_stderr,all: 0.011396120309131366
faquad_nli_alias: faquad_nli
faquad_nli_f1_macro,all: 0.4396551724137931
faquad_nli_f1_macro_stderr,all: 0.00357969847290883
global_piqa_completions_por_latn_braz_acc: 0.7
global_piqa_completions_por_latn_braz_acc_bytes: 0.69
global_piqa_completions_por_latn_braz_acc_bytes_stderr: 0.046482319871173176
global_piqa_completions_por_latn_braz_acc_norm: 0.68
global_piqa_completions_por_latn_braz_acc_norm_stderr: 0.046882617226215076
global_piqa_completions_por_latn_braz_acc_stderr: 0.04605661864718383
global_piqa_completions_por_latn_braz_alias: global_piqa_completions_por_latn_braz
gsm8k_pt_alias: gsm8k_pt
gsm8k_pt_exact_match,flexible-extract: 0.1461187214611872
gsm8k_pt_exact_match,strict-match: 0.0
gsm8k_pt_exact_match_stderr,flexible-extract: 0.009748085159664667
gsm8k_pt_exact_match_stderr,strict-match: 0.0
hatebr_offensive_acc,all: 0.5328571428571428
hatebr_offensive_acc_stderr,all: 0.009452579861644791
hatebr_offensive_alias: hatebr_offensive_binary
hatebr_offensive_f1_macro,all: 0.4054916057180851
hatebr_offensive_f1_macro_stderr,all: 0.007877082274553134
hellaswag_poly_pt_acc: 0.3781558131975295
hellaswag_poly_pt_acc_norm: 0.47209881894029687
hellaswag_poly_pt_acc_norm_stderr: 0.005196835630828483
hellaswag_poly_pt_acc_stderr: 0.005048035343611143
hellaswag_poly_pt_alias: hellaswag_poly_pt
humaneval_instruct_alias: humaneval_instruct
humaneval_instruct_pass@1,create_test: 0.0
humaneval_instruct_pass@1_stderr,create_test: 0.0
ifeval_pt_alias: ifeval_pt
ifeval_pt_inst_level_loose_acc: 0.3930232558139535
ifeval_pt_inst_level_loose_acc_stderr: N/A
ifeval_pt_inst_level_strict_acc: 0.3302325581395349
ifeval_pt_inst_level_strict_acc_stderr: N/A
ifeval_pt_prompt_level_loose_acc: 0.27666666666666667
ifeval_pt_prompt_level_loose_acc_stderr: 0.025870931391123536
ifeval_pt_prompt_level_strict_acc: 0.21
ifeval_pt_prompt_level_strict_acc_stderr: 0.023555243542102446
lambada_poly_pt_acc: 0.20861633999611875
lambada_poly_pt_acc_stderr: 0.005660825573438258
lambada_poly_pt_alias: lambada_poly_pt
lambada_poly_pt_perplexity: 861.6919525698232
lambada_poly_pt_perplexity_stderr: 63.082803555905926
mmlu_poly_pt_acc: 0.3607775442809967
mmlu_poly_pt_acc_stderr: 0.004160492531275285
mmlu_poly_pt_alias: mmlu_poly_pt
oab_exams_acc,all: 0.27015945330296126
oab_exams_acc,exam_id__2010-01: 0.24705882352941178
oab_exams_acc,exam_id__2010-02: 0.28
oab_exams_acc,exam_id__2011-03: 0.24242424242424243
oab_exams_acc,exam_id__2011-04: 0.225
oab_exams_acc,exam_id__2011-05: 0.325
oab_exams_acc,exam_id__2012-06: 0.3
oab_exams_acc,exam_id__2012-06a: 0.2125
oab_exams_acc,exam_id__2012-07: 0.3125
oab_exams_acc,exam_id__2012-08: 0.1375
oab_exams_acc,exam_id__2012-09: 0.22077922077922077
oab_exams_acc,exam_id__2013-10: 0.2375
oab_exams_acc,exam_id__2013-11: 0.275
oab_exams_acc,exam_id__2013-12: 0.2375
oab_exams_acc,exam_id__2014-13: 0.2875
oab_exams_acc,exam_id__2014-14: 0.25
oab_exams_acc,exam_id__2014-15: 0.21794871794871795
oab_exams_acc,exam_id__2015-16: 0.2875
oab_exams_acc,exam_id__2015-17: 0.3333333333333333
oab_exams_acc,exam_id__2015-18: 0.275
oab_exams_acc,exam_id__2016-19: 0.2692307692307692
oab_exams_acc,exam_id__2016-20: 0.3625
oab_exams_acc,exam_id__2016-20a: 0.275
oab_exams_acc,exam_id__2016-21: 0.3
oab_exams_acc,exam_id__2017-22: 0.3375
oab_exams_acc,exam_id__2017-23: 0.3
oab_exams_acc,exam_id__2017-24: 0.3
oab_exams_acc,exam_id__2018-25: 0.25
oab_exams_acc_stderr,all: 0.0054708493847973475
oab_exams_acc_stderr,exam_id__2010-01: 0.027072546734021444
oab_exams_acc_stderr,exam_id__2010-02: 0.025924135072639538
oab_exams_acc_stderr,exam_id__2011-03: 0.024845779837305654
oab_exams_acc_stderr,exam_id__2011-04: 0.026955276691079964
oab_exams_acc_stderr,exam_id__2011-05: 0.03024661215761147
oab_exams_acc_stderr,exam_id__2012-06: 0.02951246277176116
oab_exams_acc_stderr,exam_id__2012-06a: 0.026389734380509827
oab_exams_acc_stderr,exam_id__2012-07: 0.029841237684623958
oab_exams_acc_stderr,exam_id__2012-08: 0.0221709705988969
oab_exams_acc_stderr,exam_id__2012-09: 0.027302999732294067
oab_exams_acc_stderr,exam_id__2013-10: 0.0274908309628061
oab_exams_acc_stderr,exam_id__2013-11: 0.02882257223036154
oab_exams_acc_stderr,exam_id__2013-12: 0.027413917931195794
oab_exams_acc_stderr,exam_id__2014-13: 0.02917450516650155
oab_exams_acc_stderr,exam_id__2014-14: 0.027979401755101777
oab_exams_acc_stderr,exam_id__2014-15: 0.027011736935613068
oab_exams_acc_stderr,exam_id__2015-16: 0.029203723510899393
oab_exams_acc_stderr,exam_id__2015-17: 0.030881077024925714
oab_exams_acc_stderr,exam_id__2015-18: 0.0288121979710083
oab_exams_acc_stderr,exam_id__2016-19: 0.029017747344098694
oab_exams_acc_stderr,exam_id__2016-20: 0.030947316988827907
oab_exams_acc_stderr,exam_id__2016-20a: 0.02885616149691637
oab_exams_acc_stderr,exam_id__2016-21: 0.029466001599879478
oab_exams_acc_stderr,exam_id__2017-22: 0.0304433823531397
oab_exams_acc_stderr,exam_id__2017-23: 0.0295029507416334
oab_exams_acc_stderr,exam_id__2017-24: 0.029614631804279756
oab_exams_acc_stderr,exam_id__2018-25: 0.028064299041816978
oab_exams_alias: oab_exams
portuguese_hate_speech_acc,all: 0.30552291421856637
portuguese_hate_speech_acc_stderr,all: 0.0111342367647186
portuguese_hate_speech_alias: portuguese_hate_speech_binary
portuguese_hate_speech_f1_macro,all: 0.24331655796529078
portuguese_hate_speech_f1_macro_stderr,all: 0.007523479961403228
tweetsentbr_acc,all: 0.34626865671641793
tweetsentbr_acc_stderr,all: 0.007507813204647027
tweetsentbr_alias: tweetsentbr
tweetsentbr_f1_macro,all: 0.24098176805099736
tweetsentbr_f1_macro_stderr,all: 0.005874676993024155
step: 3595

13
generation_config.json Normal file
View File

@@ -0,0 +1,13 @@
{
"bos_token_id": 1,
"do_sample": true,
"eos_token_id": [
2
],
"max_new_tokens": 1024,
"pad_token_id": 49109,
"renormalize_logits": true,
"repetition_penalty": 1.2,
"temperature": 0.1,
"transformers_version": "4.57.3"
}

3
logo.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1856d91c3b35390cee5122902d94044657c67df7034ca4005316275c404fc8a0
size 197189

3
model.safetensors Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e83956aec017fc069d60714d38ee9595ad35e9d242c722c865861340bbf1ebf8
size 981633328

82
ruler.yaml Normal file
View File

@@ -0,0 +1,82 @@
model_name: Tucano2-qwen-1.5B-Think
results:
niah_pt_multikey_1_1024: 0.706
niah_pt_multikey_1_1024_stderr: 0.02039509548493655
niah_pt_multikey_1_2048: 0.67
niah_pt_multikey_1_2048_stderr: 0.021049612166134782
niah_pt_multikey_1_4096: 0.542
niah_pt_multikey_1_4096_stderr: N/A
niah_pt_multikey_1_alias: " - niah_pt_multikey_1"
niah_pt_multikey_2_1024: 0.48
niah_pt_multikey_2_1024_stderr: 0.022365160424231326
niah_pt_multikey_2_2048: 0.238
niah_pt_multikey_2_2048_stderr: 0.019064072958198387
niah_pt_multikey_2_4096: 0.062
niah_pt_multikey_2_4096_stderr: N/A
niah_pt_multikey_2_alias: " - niah_pt_multikey_2"
niah_pt_multikey_3_1024: 0.486
niah_pt_multikey_3_1024_stderr: 0.022374298166353144
niah_pt_multikey_3_2048: 0.31
niah_pt_multikey_3_2048_stderr: 0.020704041021724684
niah_pt_multikey_3_4096: 0.184
niah_pt_multikey_3_4096_stderr: N/A
niah_pt_multikey_3_alias: " - niah_pt_multikey_3"
niah_pt_multiquery_1024: 0.531
niah_pt_multiquery_1024_stderr: 0.013691344193015646
niah_pt_multiquery_2048: 0.4915
niah_pt_multiquery_2048_stderr: 0.014053487147395266
niah_pt_multiquery_4096: 0.4215
niah_pt_multiquery_4096_stderr: N/A
niah_pt_multiquery_alias: " - niah_pt_multiquery"
niah_pt_multivalue_1024: 0.4995
niah_pt_multivalue_1024_stderr: 0.013476376569794338
niah_pt_multivalue_2048: 0.519
niah_pt_multivalue_2048_stderr: 0.013327913059930505
niah_pt_multivalue_4096: 0.4545
niah_pt_multivalue_4096_stderr: N/A
niah_pt_multivalue_alias: " - niah_pt_multivalue"
niah_pt_single_1_1024: 0.82
niah_pt_single_1_1024_stderr: 0.017198592476314233
niah_pt_single_1_2048: 0.816
niah_pt_single_1_2048_stderr: 0.017346174781752842
niah_pt_single_1_4096: 0.8
niah_pt_single_1_4096_stderr: N/A
niah_pt_single_1_alias: " - niah_pt_single_1"
niah_pt_single_2_1024: 0.778
niah_pt_single_2_1024_stderr: 0.018604414758250098
niah_pt_single_2_2048: 0.772
niah_pt_single_2_2048_stderr: 0.018781306529363172
niah_pt_single_2_4096: 0.688
niah_pt_single_2_4096_stderr: N/A
niah_pt_single_2_alias: " - niah_pt_single_2"
niah_pt_single_3_1024: 0.468
niah_pt_single_3_1024_stderr: 0.022337186479044296
niah_pt_single_3_2048: 0.508
niah_pt_single_3_2048_stderr: 0.022380208834928014
niah_pt_single_3_4096: 0.5
niah_pt_single_3_4096_stderr: N/A
niah_pt_single_3_alias: " - niah_pt_single_3"
ruler_pt_4096: 0.44008484848484847
ruler_pt_4096_stderr: N/A
ruler_pt_alias: ruler_pt
ruler_pt_cwe_1024: 0.2516
ruler_pt_cwe_1024_stderr: 0.0065879953982022075
ruler_pt_cwe_2048: 0.10560000000000001
ruler_pt_cwe_2048_stderr: 0.0046626989526502875
ruler_pt_cwe_4096: 0.268
ruler_pt_cwe_4096_stderr: N/A
ruler_pt_cwe_alias: " - ruler_pt_cwe"
ruler_pt_fwe_1024: 0.7766666666666666
ruler_pt_fwe_1024_stderr: 0.010771818051204566
ruler_pt_fwe_2048: 0.644
ruler_pt_fwe_2048_stderr: 0.010544896116732008
ruler_pt_fwe_4096: 0.5413333333333332
ruler_pt_fwe_4096_stderr: N/A
ruler_pt_fwe_alias: " - ruler_pt_fwe"
ruler_pt_vt_1024: 0.8336
ruler_pt_vt_1024_stderr: 0.01194434656352784
ruler_pt_vt_2048: 0.4344
ruler_pt_vt_2048_stderr: 0.014185758756689964
ruler_pt_vt_4096: 0.37960000000000005
ruler_pt_vt_4096_stderr: N/A
ruler_pt_vt_alias: " - ruler_pt_vt"

30
special_tokens_map.json Normal file
View File

@@ -0,0 +1,30 @@
{
"bos_token": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<|pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<|unk|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

463711
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

397
tokenizer_config.json Normal file
View File

@@ -0,0 +1,397 @@
{
"add_bos_token": false,
"add_eos_token": false,
"add_prefix_space": null,
"added_tokens_decoder": {
"0": {
"content": "<|unk|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49109": {
"content": "<|pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49110": {
"content": "<tools>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49111": {
"content": "</tools>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49112": {
"content": "<tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49113": {
"content": "</tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49114": {
"content": "<tool_response>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49115": {
"content": "</tool_response>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49116": {
"content": "<think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49117": {
"content": "</think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49118": {
"content": "<answer>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49119": {
"content": "</answer>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49120": {
"content": "<context>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49121": {
"content": "</context>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49122": {
"content": "<|fim_prefix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49123": {
"content": "<|fim_suffix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49124": {
"content": "<|fim_middle|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49125": {
"content": "<|repo_name|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49126": {
"content": "<|image|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49127": {
"content": "<|image_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49128": {
"content": "<|image_placeholder|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49129": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49130": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49131": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49132": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49133": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49134": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49135": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49136": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49137": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49138": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49139": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49140": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49141": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49142": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49143": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49144": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49145": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49146": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49147": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49148": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49149": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49150": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49151": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
}
},
"bos_token": "<|im_start|>",
"bos_token_id": 1,
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"eos_token_id": 2,
"extra_special_tokens": {},
"legacy": false,
"model_input_names": [
"input_ids",
"attention_mask"
],
"model_max_length": 4096,
"pad_token": "<|pad|>",
"pad_token_id": 49109,
"padding_side": "right",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "PreTrainedTokenizerFast",
"truncation_side": "right",
"unk_token": "<|unk|>",
"unk_token_id": 0,
"use_default_system_prompt": false
}

3
train_logs_apo.parquet Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:33c92a3850b512684a41eff4d9c336713270ed3158857d848cdc2db4df483f70
size 46849

3
train_logs_sft.parquet Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c9f244d76ed1cabcccae0ac9461aa0c5313464cda19aaf67603a665a05d7d677
size 67160

98
training_config_apo.yaml Normal file
View File

@@ -0,0 +1,98 @@
# Directory settings
checkpoint_dir: "/polyglot/portuguese/checkpoints/models/Tucano2-qwen-0.5B-Think"
train_dataset_dir:
# Total: 13,649 samples (x5 epochs)
# Harmfull samples (with reasoning): 4,008 samples
- /polyglot/portuguese/gigaverbo-v2-dpo/harmfull-reasoning
# Harmless samples (with reasoning): 9,641 samples
- /polyglot/portuguese/gigaverbo-v2-dpo/harmless-reasoning
val_dataset_dir: null
dataset_type: "jsonl"
cache_dir: "/lustre/mlnvme/data/polyglot/.cache"
# Data loading settings
pin_memory: true
num_workers_for_dataloader: 16
shuffle_dataset: true
mask_eos_token: false
mask_pad_token: false
# Model architecture settings
vocab_size: 49152
num_hidden_layers: 28
num_attention_heads: 16
num_key_value_heads: 8
head_dim: 128
hidden_size: 1024
intermediate_size: 3072
max_position_embeddings: 4096
tie_word_embeddings: true
hidden_act: "silu"
output_hidden_states: false
attn_implementation: "flash_attention_2"
use_cache: false
no_rope_layer_interval: null
rope_theta: 1000000.0
rope_scale_factor: null
rms_norm_eps: 0.000001
# Training settings
total_batch_size: 524288
micro_batch_size: 4
gradient_accumulation_steps: 4
eval_micro_batch_size: null
num_train_epochs: 5
warmup_ratio: 0.1
max_learning_rate: 0.000005
min_learning_rate: 0.0
muon_learning_rate: null
weight_decay: 0.0
beta1: 0.9
beta2: 0.95
eps: 0.00000001
lr_decay_type: "cosine"
use_sqrt: false
lr_decay_iters_coef: 1.
seed: 42
max_steps: 535
max_grad_norm: 1.0
# APO settings
loss_type: "apo_zero"
dpo_beta: 0.5
precompute_ref_log_probs: true
truncation_mode: "keep_end"
# Precision and optimization settings
torch_compile: false
mat_mul_precision: "highest"
tf32: true
bf16: true
gradient_checkpointing: false
use_liger_kernel: false
static_graph: false
# Hub settings
push_to_hub: false
hub_token: null
hub_model_id: null
# Tokenizer and Reference model
tokenizer_name_or_path: "/polyglot/portuguese/checkpoints/models/Tucano2-qwen-0.5B-Think-SFT"
chat_template_path: null
reference_model: "/polyglot/portuguese/checkpoints/models/Tucano2-qwen-0.5B-Think-SFT"
continual_pretraining: true
# Checkpoint settings
resume_from_checkpoint: null
checkpointing_steps: 1000
begin_new_stage: true
stage_name: "single_cosine"
# Miscellaneous settings
sanity_check: false
sanity_check_num_samples: 100000
wandb_token: null
wandb_id: "tucano2-qwen-0.5b-think-apo"
wandb_project: "Polyglot"
wandb_desc: "Developing LLMs for low-resource languages"

93
training_config_sft.yaml Normal file
View File

@@ -0,0 +1,93 @@
# Directory settings
checkpoint_dir: "/polyglot/portuguese/checkpoints/models/Tucano2-qwen-0.5B-Think-SFT"
train_dataset_dir:
# Reasoning: ~34 million tokens (x5 epochs)
- /polyglot/portuguese/gigaverbo-v2-sft/reasoning
val_dataset_dir: null
dataset_type: "jsonl"
cache_dir: "/lustre/mlnvme/data/polyglot/.cache"
# Data loading settings
pin_memory: true
num_workers_for_dataloader: 16
shuffle_dataset: true
mask_eos_token: false
mask_pad_token: true
# Model architecture settings
vocab_size: 49152
num_hidden_layers: 28
num_attention_heads: 16
num_key_value_heads: 8
head_dim: 128
hidden_size: 1024
intermediate_size: 3072
max_position_embeddings: 4096
tie_word_embeddings: true
hidden_act: "silu"
output_hidden_states: false
attn_implementation: "flash_attention_2"
use_cache: false
no_rope_layer_interval: null
rope_theta: 1000000.0
rope_scale_factor: null
rms_norm_eps: 0.000001
# Training settings
total_batch_size: 524288
micro_batch_size: 4
gradient_accumulation_steps: 4
eval_micro_batch_size: null
num_train_epochs: 5
warmup_ratio: 0.1
max_learning_rate: 0.000085
min_learning_rate: 0.0
muon_learning_rate: null
weight_decay: 0.0
beta1: 0.9
beta2: 0.95
eps: 0.00000001
lr_decay_type: "cosine"
use_sqrt: false
lr_decay_iters_coef: 1.
seed: 42
max_steps: 3060
max_grad_norm: 1.0
# SFT settings
packing: false
assistant_only_loss: true
# Precision and optimization settings
torch_compile: false
mat_mul_precision: "highest"
tf32: true
bf16: true
gradient_checkpointing: false
use_liger_kernel: true
static_graph: false
# Hub settings
push_to_hub: false
hub_token: null
hub_model_id: null
# Tokenizer and Reference model
tokenizer_name_or_path: "Polygl0t/Tucano2-qwen-0.5B-Base"
chat_template_path: null
reference_model: "Polygl0t/Tucano2-qwen-0.5B-Base"
continual_pretraining: true
# Checkpoint settings
resume_from_checkpoint: null
checkpointing_steps: 1000
begin_new_stage: true
stage_name: "single_cosine"
# Miscellaneous settings
sanity_check: false
sanity_check_num_samples: 100000
wandb_token: null
wandb_id: "tucano2-qwen-0.5b-think-sft"
wandb_project: "Polyglot"
wandb_desc: "Developing LLMs for low-resource languages"