初始化项目,由ModelHub XC社区提供模型

Model: Polygl0t/Tucano2-qwen-0.5B-Base
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-05-31 00:50:30 +08:00
commit 5dcf73b2aa
34 changed files with 465459 additions and 0 deletions

63
.gitattributes vendored Normal file
View File

@@ -0,0 +1,63 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
learning_curve.png filter=lfs diff=lfs merge=lfs -text
gradient_norm.png filter=lfs diff=lfs merge=lfs -text
benchmarks_easy.png filter=lfs diff=lfs merge=lfs -text
benchmarks_hard.png filter=lfs diff=lfs merge=lfs -text
performance_vs_compute.png filter=lfs diff=lfs merge=lfs -text
logo.png filter=lfs diff=lfs merge=lfs -text
npm_hard.png filter=lfs diff=lfs merge=lfs -text
npm_easy.png filter=lfs diff=lfs merge=lfs -text
learning_rate_sweep_loss_curves.png filter=lfs diff=lfs merge=lfs -text
tucano2_qwen_benchmarks_easy.png filter=lfs diff=lfs merge=lfs -text
tucano2_qwen_benchmarks_hard.png filter=lfs diff=lfs merge=lfs -text
learning_rate_sweep_gradient_norm.png filter=lfs diff=lfs merge=lfs -text
.plots/arc_challenge.png filter=lfs diff=lfs merge=lfs -text
.plots/before_and_after.png filter=lfs diff=lfs merge=lfs -text
.plots/belebele.png filter=lfs diff=lfs merge=lfs -text
.plots/bluex.png filter=lfs diff=lfs merge=lfs -text
.plots/calame.png filter=lfs diff=lfs merge=lfs -text
.plots/enem.png filter=lfs diff=lfs merge=lfs -text
.plots/global_piqa.png filter=lfs diff=lfs merge=lfs -text
.plots/gradient_norm.png filter=lfs diff=lfs merge=lfs -text
.plots/hellaswag.png filter=lfs diff=lfs merge=lfs -text
.plots/lambada.png filter=lfs diff=lfs merge=lfs -text
.plots/learning_curve.png filter=lfs diff=lfs merge=lfs -text
.plots/mmlu.png filter=lfs diff=lfs merge=lfs -text
.plots/npm_easy.png filter=lfs diff=lfs merge=lfs -text
.plots/npm_hard.png filter=lfs diff=lfs merge=lfs -text
.plots/oab.png filter=lfs diff=lfs merge=lfs -text
.plots/performance_vs_compute.png filter=lfs diff=lfs merge=lfs -text

3
.plots/arc_challenge.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:06e57d452b8bd974faa869a80550ce2476a9fa1657fed1be73370212dbdd84b7
size 213487

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:193dfcf5245c52b93bb2d44ee9055a89cb4b8d1308b673981bf64520c71c0300
size 276126

3
.plots/belebele.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:19e27d83ef2f477331b48276a844f7f082637aa1d20cdeeb7d98606f34758551
size 217880

3
.plots/bluex.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4ddda133a2debf54b823f79198d7f32f891d5ddbd7cfd5ae818392234881cadc
size 198062

3
.plots/calame.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4cccaaa5ff197b65bd93718237c2d8bb3293edf9daa28c0e4155fd940d7a3487
size 187776

3
.plots/enem.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9b6ab4391443f2bc18d26051e6d910b37ed5a8e569b16e192dd875fbb8526b32
size 214997

3
.plots/global_piqa.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1dc7274731cffebd55d3c9db3eac8e67639e09f214949482ada9392c8c0b0838
size 212760

3
.plots/gradient_norm.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c9b51e2e82434351f48dffcda1f8af0ef245d8c31bd33472d6e2f6f19d994ef2
size 272609

3
.plots/hellaswag.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d09558e642ff84e353835418fc7aa79d2f1bc5ffde8f95263ce6a27c18a4861b
size 185265

3
.plots/lambada.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:097b920f9bc49e5a4f260929498ae041287ff582c1286921a1b64ccf372044a3
size 205781

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:932659ccf928ed476fb0c91feaf4dc79d506fad57af6d3305b3bd3a3c8ca3eef
size 222461

3
.plots/mmlu.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:f1e9a7fb6e69bc9db2eb5d6275b16c45aef86659b6a481dad096f9a1a5e94299
size 207586

3
.plots/npm_easy.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:aa00597875dee77c6f3772a5d7f259b9f30d2f1095c75aa36d3986982ad17ac9
size 199478

3
.plots/npm_hard.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:131b9d2381f65b38afdb760b582fff15ccf056cd031198fc620ce7c5f18a625f
size 240324

3
.plots/oab.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:43ccc88149515fda94aad0e6ef247819440f79834d69456dcf5988394fb78d0f
size 219714

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c4a657d6d68d6f2f1fbf0d3750eac546a7ead054995a466497e0e31bfeedd49c
size 949327

190
LICENSE Normal file
View File

@@ -0,0 +1,190 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
Copyright Nicholas Kluge Corrêa, Shiza Fatimah, Aniket Sen, and Sophia Falk
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

495
README.md Normal file
View File

@@ -0,0 +1,495 @@
---
language:
- pt
license: apache-2.0
library_name: transformers
tags:
- text-generation-inference
datasets:
- Polygl0t/gigaverbo-v2
- Polygl0t/gigaverbo-v2-synth
metrics:
- perplexity
pipeline_tag: text-generation
widget:
- text: "A floresta da Amazônia é conhecida por sua"
example_title: Exemplo
- text: "Uma das coisas que Portugal, Angola, Brasil e Moçambique tem em comum é o"
example_title: Exemplo
- text: "O Carnaval do Rio de Janeiro é"
example_title: Exemplo
inference:
parameters:
repetition_penalty: 1.2
temperature: 0.1
top_k: 50
top_p: 1.0
max_new_tokens: 150
co2_eq_emissions:
emissions: 86000
source: CodeCarbon
training_type: pre-training
geographical_location: Germany
hardware_used: NVIDIA A100-SXM4-80GB
model-index:
- name: Tucano2-qwen-0.5B-Base
results:
- task:
type: text-generation
name: Text Generation
dataset:
name: ARC Challenge
type: Polygl0t/ARC-poly
split: test
args:
num_few_shot: 5
metrics:
- type: acc_norm
value: 37.44
name: Acc-norm
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: arc_challenge_poly_pt
- task:
type: text-generation
name: Text Generation
dataset:
name: HellaSwag
type: Polygl0t/HellaSwag-poly
split: validation
args:
num_few_shot: 5
metrics:
- type: acc_norm
value: 48.43
name: Acc-norm
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: hellaswag_poly_pt
- task:
type: text-generation
name: Text Generation
dataset:
name: Calame
type: Polygl0t/CALAME-PT
split: test
args:
num_few_shot: 5
metrics:
- type: acc
value: 58.67
name: Acc
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: calame_pt
- task:
type: text-generation
name: Text Generation
dataset:
name: Lambada
type: Polygl0t/LAMBADA-poly
split: test
args:
num_few_shot: 5
metrics:
- type: acc
value: 45.14
name: Acc
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: lambada_poly_pt
- task:
type: text-generation
name: Text Generation
dataset:
name: Global PIQA
type: mrlbenchmarks/global-piqa-nonparallel
split: test
args:
num_few_shot: 5
metrics:
- type: acc_norm
value: 74
name: Acc-norm
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: global_piqa_completions_por_latn_braz
- task:
type: text-generation
name: Text Generation
dataset:
name: MMLU
type: Polygl0t/MMLU-poly
split: test
args:
num_few_shot: 5
metrics:
- type: acc
value: 39.68
name: Acc
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: mmlu_poly_pt
- task:
type: text-generation
name: Text Generation
dataset:
name: BELEBELE
type: facebook/belebele
split: test
args:
num_few_shot: 5
metrics:
- type: acc_norm
value: 53.89
name: Acc-norm
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: belebele_por_Latn
- task:
type: text-generation
name: Text Generation
dataset:
name: BLUEX
type: eduagarcia-temp/BLUEX_without_images
split: train
args:
num_few_shot: 3
metrics:
- type: acc
value: 46.87
name: Acc
source:
url: https://github.com/eduagarcia/lm-evaluation-harness-pt
name: bluex
- task:
type: text-generation
name: Text Generation
dataset:
name: ENEM Challenge
type: eduagarcia/enem_challenge
split: train
args:
num_few_shot: 3
metrics:
- type: acc
value: 55.14
name: Acc
source:
url: https://github.com/eduagarcia/lm-evaluation-harness-pt
name: enem_challenge
- task:
type: text-generation
name: Text Generation
dataset:
name: OAB Exams
type: eduagarcia/oab_exams
split: train
args:
num_few_shot: 3
metrics:
- type: acc
value: 40.36
name: Acc
source:
url: https://github.com/eduagarcia/lm-evaluation-harness-pt
name: oab_exams
base_model: Qwen/Qwen3-0.6B-Base
---
# Tucano2-qwen-0.5B-Base
<img src="./logo.png" alt="An illustration of a Tucano bird showing vibrant colors like yellow, orange, blue, green, and black." height="200">
## Model Summary
**[Tucano2-qwen-0.5B-Base](https://huggingface.co/Polygl0t/Tucano2-qwen-0.5B-Base)** is a decoder-only transformer continually pretrained from [Qwen3-0.6B-Base](https://huggingface.co/Qwen/Qwen3-0.6B-Base). Tucano2 is part of the [Polygl0t](https://huggingface.co/Polygl0t) initiative, which aims to advance language models for low-resource languages.
Tucano2-qwen-0.5B-Base shares the same tokenizer as **[Tucano2-0.6B-Base](https://huggingface.co/Polygl0t/Tucano2-0.6B-Base)**. Token embedding transplantation via _Orthogonal Matching Pursuit_ was used to adapt Qwen3-0.6B-Base to be more sensitive to the lexical, morphological, and orthographic properties of Portuguese.
The model was continually pretrained on approximately 50 billion tokens and achieves state-of-the-art performance across several benchmarks designed to evaluate Portuguese language models. **All data, source code, and recipes used to develop the Tucano2 series are open and fully reproducible.**
## Details
- **Architecture:** a Transformer-based model ([`qwen3`](https://huggingface.co/docs/transformers/main/en/model_doc/qwen3))
- **Size:** 490,799,104 parameters
- **Context length:** 4,096 tokens
- **Dataset(s):**
- [Polygl0t/gigaverbo-v2](https://huggingface.co/datasets/Polygl0t/gigaverbo-v2)
- [Polygl0t/gigaverbo-v2-synth](https://huggingface.co/datasets/Polygl0t/gigaverbo-v2-synth)
- **Language(s):** Portuguese
- **Batch size:** 1,048,576 tokens
- **Number of steps:** 50,000
- **GPU:** 8 NVIDIA A100-SXM4-80GB
- **Training time**: ~ 59 hours
- **Emissions:** 86 KgCO2 (Germany)
- **Total energy consumption:** 225 kWh
This repository has the [source code](https://github.com/Polygl0t/llm-foundry) used to train this model. The full configuration used for training is available in the following config file:
- Single stage (linear warmup with cosine decay): [training_config.yaml](training_config.yaml)
### Checkpoints
Checkpoints were saved every 2,500 steps, which equates to approximately 2.5 billion tokens. The main branch of this repository contains the final checkpoint saved at step 50000. All other checkpoints are available as separate branches. To load a specific checkpoint, you can use the following code snippet:
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
model_id = "Polygl0t/Tucano2-qwen-0.5B-Base"
revision = "step-2500" # Change this to the desired checkpoint branch
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, revision=revision)
```
Or, you can access all the revisions for the models via the following code snippet:
```python
from huggingface_hub import list_repo_refs
out = list_repo_refs("Polygl0t/Tucano2-qwen-0.5B-Base")
branches = [b.name for b in out.branches]
print(branches)
```
<details>
<summary><b>Learning Curves</b></summary>
![Learning Curves](./.plots/learning_curve.png)
This plot illustrates the evolution of model performance (measured by loss) as a function of training time, measured in tokens seen during training
</details>
<details>
<summary><b>Gradient Norms (L2)</b></summary>
![Gradient Norms](./.plots/gradient_norm.png)
This plot illustrates the evolution of gradient norms as a function of training time, measured in tokens seen during training.
</details>
## Intended Uses
The primary intended use of Tucano2-qwen-0.5B-Base is to serve as a foundation for research and development involving Portuguese language modeling. Checkpoints saved during training are designed to provide a controlled setting for performing comparative experiments, specifically regarding the effects of continual pretraining on the performance of currently available benchmarks. You may also fine-tune and adapt Tucano2-qwen-0.5B-Base for deployment if your use follows the Apache 2.0 license. If you decide to use Tucano2-qwen-0.5B-Base as a basis for your fine-tuned model, please conduct your own risk and bias assessment.
## Out-of-scope Use
- Tucano2-qwen-0.5B-Base is **not intended for deployment**. It is not an out-of-the-box product and should not be used for human-facing interactions.
- Tucano2-qwen-0.5B-Base is for **the Portuguese language only** and is unsuitable for text generation tasks in other languages.
- Tucano2-qwen-0.5B-Base has **not been fine-tuned** for downstream tasks.
## Basic usage
```python
from transformers import GenerationConfig, TextGenerationPipeline, AutoTokenizer, AutoModelForCausalLM
import torch
# Specify the model and tokenizer
model_id = "Polygl0t/Tucano2-qwen-0.5B-Base"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
# Specify the generation parameters as you like
generation_config = GenerationConfig(
**{
"do_sample": True,
"max_new_tokens": 150,
"renormalize_logits": True,
"repetition_penalty": 1.2,
"temperature": 0.1,
"top_k": 50,
"top_p": 1.0,
"use_cache": True,
}
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
generator = TextGenerationPipeline(model=model, task="text-generation", tokenizer=tokenizer, device=device)
# Generate text
prompt = "# A floresta da Amazônia: um lugar de Magia\n\n"
completion = generator(prompt, generation_config=generation_config)
print(completion[0]['generated_text'])
```
## Limitations
As almost all other language models trained on large text datasets scraped from the web, the Tucano2-qwen-0.5B-Base shows behavior that does not make it an out-of-the-box solution to many real-world applications, especially those requiring factual, reliable, and nontoxic text generation. Tucano2-qwen-0.5B-Base is subject to the following:
- **Hallucinations:** Tucano2-qwen-0.5B-Base can produce content that can be mistaken as facts, but is misleading or entirely false, i.e., hallucinations.
- **Biases and Toxicity:** Tucano2-qwen-0.5B-Base inherits the social and historical stereotypes from the data used to train it. Given these biases, the model can produce toxic content, i.e., harmful, offensive, or detrimental to individuals, groups, or communities.
- **Language Limitations:** Tucano2-qwen-0.5B-Base is primarily designed to interact with Portuguese. Other languages might challenge its comprehension, leading to potential misinterpretations or errors in response.
- **Repetition and Verbosity:** Tucano2-qwen-0.5B-Base may get stuck on repetition loops (especially if the repetition penalty during generations is set to a meager value) or produce verbose responses unrelated to the prompt it was given.
Hence, even though Tucano2-qwen-0.5B-Base is released under a permissive license, we urge users to perform their own risk analysis before using it for real-world applications.
## Evaluations
The table below compares the Tucano2 series against other base models of similar size. We divide our evaluations into two sets:
- **Easy Set**: CALAME, GlobalPIQA, LAMBADA, ARC-Challenge, HellaSwag
- **Hard Set**: ENEM, BLUEX, OAB Exams, BELEBELE, MMLU
The NPM (Normalized Performance Metric) provides a balanced view of model performance across tasks, accounting for each task's inherent difficulty by normalizing its evaluation score relative to its random baseline.
| | Total Avg. | Easy Set (NPM) | Hard Set (NPM) |
| -------------------------- | ---------- | -------------- | -------------- |
| **Tucano2-qwen-3.7B-Base** | 59.21 | 57.41 | 61 |
| Qwen2.5-7B | 57.97 | 54.12 | 61.83 |
| Qwen3-4B-Base | 57.86 | 52.52 | 63.2 |
| SmolLM3-3B-Base | 50.25 | 54.06 | 46.44 |
| Qwen2.5-3B | 50.16 | 47.69 | 52.62 |
| **Tucano2-qwen-1.5B-Base** | 47.9 | 47.97 | 47.82 |
| Curio-edu-7b | 45.66 | 57.46 | 33.87 |
| Qwen3-1.7B-Base | 44.48 | 40.94 | 48.03 |
| Curio-7b | 42.79 | 58.97 | 26.6 |
| Llama-3.2-3B | 40.5 | 43.79 | 37.21 |
| granite-3.3-2b-base | 39.97 | 45.31 | 34.63 |
| **Tucano2-qwen-0.5B-Base** | 35.36 | 39.93 | 30.79 |
| Qwen3-0.6B-Base | 29.4 | 26.41 | 32.38 |
| Llama-2-7b-hf | 29.36 | 42.69 | 16.03 |
| **Tucano2-0.6B-Base** | 20.64 | 40.28 | 0.99 |
| Qwen2.5-0.5B | 19.89 | 18.7 | 21.09 |
| Curio-1.1b | 19.23 | 39.16 | -0.69 |
| Tucano-2b4 | 17.88 | 33.55 | 2.2 |
| Curio-edu-1b1 | 17.72 | 34.77 | 0.67 |
| Llama-3.2-1B | 16.57 | 28.32 | 4.83 |
| Tucano-1b1 | 15.44 | 29.12 | 1.76 |
| Tucano-630m | 14.9 | 26.99 | 2.8 |
| Carvalho_pt-gl-1.3B | 12.54 | 26.75 | -1.66 |
| TeenyTinyLlama-460m | 11.18 | 19.65 | 2.72 |
| Tucano-160m | 8.78 | 19.12 | -1.56 |
| TeenyTinyLlama-160m | 7.72 | 15.75 | -0.31 |
| GlorIA-1.3B | 5.93 | 27.27 | -15.42 |
<details>
<summary><b>Evaluation Suite</b></summary>
| **Benchmark** | **n-shot** | **Type** | **Baseline** | **Metric** |
| --------------- | ---------- | ------------------ | ------------ | ---------- |
| **Easy Set** | | | | |
| CALAME | 5-shot | Completion | 0 | `acc` |
| GlobalPIQA | 5-shot | Completion | 50 | `acc_norm` |
| LAMBADA | 5-shot | Completion | 0 | `acc` |
| ARC-Challenge | 5-shot | MC-Q&A | 25 | `acc_norm` |
| HellaSwag | 5-shot | Completion | 25 | `acc_norm` |
| **Hard Set** | | | | |
| ENEM           | 3-shot     | MC-Q&A             | 20           | `acc` |
| BLUEX           | 3-shot     | MC-Q&A             | 22.5         | `acc` |
| OAB Exams       | 3-shot     | MC-Q&A             | 25           | `acc` |
| BELEBELE | 5-shot | MC-Q&A | 25 | `acc_norm` |
| MMLU | 5-shot | MC-Q&A | 25 | `acc` |
</details>
<details>
<summary><b>Individual Benchmarks</b></summary>
| | BLUEX | ENEM | OAB | ARC Challenge | BELEBELE | CALAME | Global PIQA | HellaSwag | LAMBADA | MMLU |
| -------------------------- | ----- | ----- | ----- | ------------- | -------- | ------ | ----------- | --------- | ------- | ----- |
| **Tucano2-qwen-3.7B-Base** | 66.2 | 77.54 | 58.45 | 57.78 | 83.67 | 61.08 | 83 | 65.32 | 62.53 | 65.4 |
| Qwen2.5-7B | 65.92 | 75.02 | 55.03 | 54.19 | 89.67 | 58.96 | 78 | 67.92 | 59.52 | 68.55 |
| Qwen3-4B-Base | 69.96 | 77.61 | 55.58 | 54.53 | 87.89 | 57.95 | 77 | 63.19 | 60.37 | 68.59 |
| SmolLM3-3B-Base | 54.52 | 61.37 | 45.51 | 51.37 | 77.67 | 59.15 | 81 | 65.57 | 59.89 | 56.19 |
| Qwen2.5-3B | 58.28 | 67.32 | 50.34 | 45.21 | 83.22 | 58.38 | 75 | 59.44 | 57.17 | 59.79 |
| **Tucano2-qwen-1.5B-Base** | 55.91 | 68.72 | 48.29 | 48.21 | 74 | 59.06 | 77 | 56.25 | 54.2 | 54.04 |
| Curio-edu-7b | 47.15 | 58.64 | 43.78 | 50.94 | 53 | 60.79 | 86 | 66.48 | 64.62 | 45.14 |
| Qwen3-1.7B-Base | 57.16 | 65.22 | 45.79 | 47.18 | 77.89 | 53.56 | 67 | 52.55 | 50.81 | 55.49 |
| Curio-7b | 43.39 | 50.59 | 39.68 | 48.03 | 45.33 | 63.44 | 89 | 67.58 | 65.94 | 40.83 |
| Llama-3.2-3B | 50.35 | 53.04 | 39.45 | 41.11 | 68.89 | 54.48 | 69 | 59.14 | 59.48 | 48.28 |
| granite-3.3-2b-base | 45.34 | 54.02 | 39.54 | 41.37 | 65.67 | 58.77 | 70 | 60.81 | 58.22 | 45.63 |
| **Tucano2-qwen-0.5B-Base** | 46.87 | 55.14 | 40.36 | 37.44 | 53.89 | 58.67 | 74 | 48.43 | 45.14 | 39.68 |
| Qwen3-0.6B-Base | 42.98 | 49.48 | 40.46 | 36.92 | 65 | 45.95 | 54 | 40.33 | 41.78 | 43.54 |
| Llama-2-7b-hf | 31.29 | 31.77 | 35.49 | 42.14 | 41.44 | 54.53 | 67 | 56.76 | 59.73 | 38.64 |
| **Tucano2-0.6B-Base** | 21.14 | 23.58 | 23.28 | 37.01 | 26.22 | 57.61 | 79 | 47.74 | 39.45 | 27.18 |
| Qwen2.5-0.5B | 32.55 | 38.91 | 35.9 | 28.46 | 49.56 | 44.89 | 44 | 37.7 | 39.08 | 41.17 |
| Curio-1.1b | 21.56 | 21.06 | 23.1 | 30.43 | 22.89 | 59.25 | 75 | 49.45 | 46.69 | 26.35 |
| Tucano-2b4 | 25.45 | 21.62 | 26.74 | 30.43 | 25.89 | 50.34 | 73 | 48.85 | 32.39 | 26.24 |
| Curio-edu-1b1 | 23.5 | 19.87 | 25.01 | 32.22 | 26.22 | 54.91 | 69 | 46.3 | 42.93 | 25.43 |
| Llama-3.2-1B | 24.06 | 23.93 | 26.06 | 31.71 | 33.33 | 50 | 55 | 45.27 | 45.6 | 28.51 |
| Tucano-1b1 | 25.45 | 21.55 | 26.38 | 30.09 | 25.67 | 48.94 | 68 | 44.1 | 28.43 | 25.26 |
| Tucano-630m | 26.7 | 21.69 | 26.92 | 28.72 | 27.33 | 47.3 | 68 | 40.37 | 26.2 | 25.6 |
| Carvalho_pt-gl-1.3B | 19.33 | 18.12 | 22.32 | 27.01 | 26.44 | 53.42 | 63 | 38.53 | 33.59 | 24.82 |
| TeenyTinyLlama-460m | 25.87 | 20.15 | 27.02 | 27.35 | 28.11 | 42.49 | 59 | 34.81 | 21.56 | 26.65 |
| Tucano-160m | 24.76 | 20.57 | 17.22 | 25.56 | 23.44 | 43.59 | 59 | 33.73 | 21.64 | 25.77 |
| TeenyTinyLlama-160m | 22.53 | 18.89 | 22.32 | 24.02 | 26.78 | 39.79 | 58 | 29.89 | 17.74 | 25.74 |
| GlorIA-1.3B | 4.31 | 2.52 | 4.69 | 26.41 | 22.78 | 54.67 | 64 | 36.35 | 36.68 | 23.69 |
</details>
## Performance and Compute
Below, we display the performance of Tucano2-qwen-0.5B-Base across all benchmarks in our evaluation suite. Tucano2-qwen-0.5B-Base is compared with Qwen3-0.6B-Base, the base model from which they were continually pretrained. The percentage variation in performance is displayed in terms of the difference in evaluation scores between the Base and the Continually Pretrained model.
All individual benchmark scores and their evolution across training time can be found in the [.plots](https://huggingface.co/Polygl0t/Tucano2-qwen-0.5B-Base/tree/main/.plots/) folder.
**Before and After Continual Pretraining**
![Performance Before and After Continual Pretraining](./.plots/before_and_after.png)
This plot compares the compute requirements (measured as C = 6 \* N \* D, where N is the number of parameters and D is the number of tokens processed) against the performance of each model (measured by the NPM score).
![NPM vs Compute](./.plots/performance_vs_compute.png)
<details>
<summary><b>Performance and Compute Details</b></summary>
| | Parameters (B) | Pretraining Tokens (B) | Continual Pretraining Tokens (B) | Total Tokens (B) | Pretraining Compute (FLOPs) | Continual Pretraining Compute (FLOPs) | Total Compute (FLOPs) | NPM Score |
|----------------------------|----------------|------------------------|----------------------------------|------------------|-----------------------------|---------------------------------------|-----------------------|-----------|
| **Tucano2-qwen-3.7B-Base** | 3.7 | 36000 | 50 | 36050 | 8.64e+23 | 1.11e+21 | 8.65e+23 | 59.2 |
| Qwen2.5-7B | 7 | 18000 | - | 18000 | 7.56e+23 | - | 7.56e+23 | 57.97 |
| Qwen3-4B-Base | 4 | 36000 | - | 36000 | 8.64e+23 | - | 8.64e+23 | 57.86 |
| SmolLM3-3B-Base | 3 | 11200 | - | 11200 | 2.02e+23 | - | 2.02e+23 | 50.25 |
| Qwen2.5-3B | 3 | 18000 | - | 18000 | 3.24e+23 | - | 3.24e+23 | 50.15 |
| **Tucano2-qwen-1.5B-Base** | 1.5 | 36000 | 100 | 36100 | 3.67e+23 | 9e+20 | 3.68e+23 | 47.89 |
| Curio-edu-7b | 7 | 2000 | 20 | 2020 | 8.4e+22 | 8.4e+20 | 8.48e+22 | 45.66 |
| Qwen3-1.7B-Base | 1.7 | 36000 | - | 36000 | 3.67e+23 | - | 3.67e+23 | 44.48 |
| Curio-7b | 7 | 2000 | 150 | 2150 | 8.4e+22 | 6.3e+21 | 9.03e+22 | 42.78 |
| Llama-3.2-3B | 3 | 9000 | - | 9000 | 1.62e+23 | - | 1.62e+23 | 40.5 |
| granite-3.3-2b-base | 2 | 12000 | - | 12000 | 1.44e+23 | - | 1.44e+23 | 39.96 |
| **Tucano2-qwen-0.5B-Base** | 0.5 | 36000 | 50 | 36050 | 1.3e+23 | 1.5e+20 | 1.3e+23 | 35.35 |
| Qwen3-0.6B-Base | 0.6 | 36000 | - | 36000 | 1.3e+23 | - | 1.3e+23 | 29.39 |
| Llama-2-7b-hf | 7 | 2000 | - | 2000 | 8.4e+22 | - | 8.4e+22 | 29.36 |
| **Tucano2-0.6B-Base** | 0.6 | 408 | - | 408 | 1.47e+21 | - | 1.47e+21 | 20.63 |
| Qwen2.5-0.5B | 0.5 | 18000 | - | 18000 | 5.4e+22 | - | 5.4e+22 | 19.89 |
| Curio-1.1b | 1.1 | 1000 | 150 | 1150 | 6.6e+21 | 9.9e+20 | 7.59e+21 | 19.23 |
| Tucano-2b4 | 2.4 | 515 | - | 515 | 7.42e+21 | - | 7.42e+21 | 17.87 |
| Curio-edu-1b1 | 1.1 | 1000 | 20 | 1020 | 6.6e+21 | 1.32e+20 | 6.73e+21 | 17.72 |
| Llama-3.2-1B | 1 | 9000 | - | 9000 | 5.4e+22 | - | 5.4e+22 | 16.57 |
| Tucano-1b1 | 1.1 | 250 | - | 250 | 1.65e+21 | - | 1.65e+21 | 15.44 |
| Tucano-630m | 0.63 | 211 | - | 211 | 7.98e+20 | - | 7.98e+20 | 14.89 |
| Carvalho_pt-gl-1.3B | 1.3 | 26 | 5 | 31 | 2.03e+20 | 3.9e+19 | 2.42e+20 | 12.54 |
| TeenyTinyLlama-460m | 0.46 | 6.2 | - | 6.2 | 1.71e+19 | - | 1.71e+19 | 11.18 |
| Tucano-160m | 0.16 | 169 | - | 169 | 1.62e+20 | - | 1.62e+20 | 8.78 |
| TeenyTinyLlama-160m | 0.16 | 6.2 | - | 6.2 | 5.95e+18 | - | 5.95e+18 | 7.71 |
| GlorIA-1.3B | 1.3 | 35 | - | 35 | 2.73e+20 | - | 2.73e+20 | 5.92 |
</details>
## Cite as 🤗
```latex
@misc{correa2026tucano2cool,
title={{Tucano 2 Cool: Better Open Source LLMs for Portuguese}},
author={Nicholas Kluge Corr{\^e}a and Aniket Sen and Shiza Fatimah and Sophia Falk and Lennard Landgraf and Julia Kastner and Lucie Flek},
year={2026},
eprint={2603.03543},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2603.03543},
}
```
## Aknowlegments
Polyglot is a project funded by the Federal Ministry of Education and Research (BMBF) and the Ministry of Culture and Science of the State of North Rhine-Westphalia (MWK) as part of TRA Sustainable Futures (University of Bonn) and the Excellence Strategy of the federal and state governments.
We also gratefully acknowledge the granted access to the [Marvin cluster](https://www.hpc.uni-bonn.de/en/systems/marvin) hosted by [University of Bonn](https://www.uni-bonn.de/en) along with the support provided by its High Performance Computing & Analytics Lab.
## License
Tucano2-qwen-0.5B-Base is licensed under the Apache License, Version 2.0. For more details, see the [LICENSE](LICENSE) file.

62
config.json Normal file
View File

@@ -0,0 +1,62 @@
{
"architectures": [
"Qwen3ForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"dtype": "bfloat16",
"eos_token_id": 2,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 1024,
"initializer_range": 0.02,
"intermediate_size": 3072,
"layer_types": [
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention"
],
"max_position_embeddings": 4096,
"max_window_layers": 28,
"model_type": "qwen3",
"num_attention_heads": 16,
"num_hidden_layers": 28,
"num_key_value_heads": 8,
"pad_token_id": 49109,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"rope_theta": 1000000,
"sliding_window": null,
"tie_word_embeddings": true,
"torch_dtype": "bfloat16",
"transformers_version": "4.53.2",
"use_cache": true,
"use_sliding_window": false,
"vocab_size": 49152
}

23
emissions.csv Normal file
View File

@@ -0,0 +1,23 @@
timestamp,project_name,run_id,experiment_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
2025-12-23T19:21:46,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,498.7068142257631,0.0992674230448318,0.0001990496624734,45.01167615000001,398.1931339192689,70.0,0.0060186035058234,0.2452029375510278,0.0093570730521705,0.2605786141090218,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-23T22:10:03,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,10595.473367678002,2.161503919738504,0.0002040025815488,45.011090439,403.7976910082256,70.0,0.1279630857588154,5.347085211831427,0.1989349541186509,5.673983251708896,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-24T01:05:21,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,21113.948883658275,4.311192573704499,0.0002041869380976,45.023994579375,944.4281598953607,70.0,0.2550055074963965,10.665507558510283,0.3964381709128567,11.31695123691954,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-24T04:00:39,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,31631.96770196408,6.460172577464265,0.000204229235384,45.0185716395,1115.3354135257653,70.0,0.382042096003632,15.982084734546447,0.5939321731623889,16.958059003712467,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-24T06:55:59,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,42151.54401289113,8.609458274858108,0.0002042501283517,45.01763225357144,1373.9160761454668,70.0,0.5091005723478386,21.29940821673512,0.7914604322271686,22.599969221310165,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-24T09:51:19,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,52671.232499394566,10.761742638095242,0.000204319172486,45.01066172625001,389.79964354842514,70.0,0.6361519924957892,26.624621368291173,0.9889776251566664,28.24975098594368,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-24T12:46:37,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,63189.4659346398,12.918288746941286,0.0002044373782222,45.02837354785714,726.7344350816131,70.0,0.7631901183161606,31.961055698268467,1.1864740861355978,33.91071990272027,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-24T15:41:56,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,73708.00911874697,15.074884482662329,0.0002045216613892,45.0199931085,1012.7033155334932,70.0,0.8902340814154672,37.29760521250461,1.3839797969378391,39.57181909085793,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-24T18:37:16,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,84228.92013456859,17.224720777664402,0.0002044988912376,45.020015251875,1395.8595638796028,70.0,1.0173064822195157,42.616057198374,1.5818109556170628,45.21517463621053,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-24T21:32:37,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,94749.86746122874,19.3738614661473,0.0002044737579614,45.02684444142857,468.36104777016834,70.0,1.144376972327447,47.93296970884506,1.7793575218654842,50.85670420303793,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-25T00:27:59,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,105271.0848454833,21.522790542507483,0.0002044511137516,45.0188224425,1240.6741071835147,70.0,1.2714529576795155,53.24931260219376,1.9769127246981053,56.49767828457142,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-25T03:23:19,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,115791.69347525388,23.67106977532321,0.0002044280471671,45.007569585000006,397.05607898261405,70.0,1.3985155428854574,58.56398341031587,2.174447562937777,62.13694651613917,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-25T06:18:39,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,126311.77048727496,25.818846208351,0.0002044057027207,45.02091609000001,556.9667245365437,70.0,1.525579168730944,63.87733187043568,2.3719838508476974,67.77489489001444,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-25T09:14:00,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,136832.0669031553,27.96662278255894,0.0002043864674086,45.016275768750006,1161.0709139911548,70.0,1.652644048411428,69.1906776613867,2.569521924691652,73.41284363448993,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-25T12:09:18,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,147350.80217676982,30.120927635009604,0.0002044164482991,45.015666102,1431.8912506906654,70.0,1.7796889506003104,74.52121140331019,2.767028862547157,79.06792921645781,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-25T15:04:39,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,157870.99625565112,32.26960138544078,0.0002044048758214,45.011602845000006,400.8619258511732,70.0,1.9067452848313229,79.83693430171849,2.964553476688838,84.7082330632387,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-25T17:59:58,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,168390.8320274204,34.42368023535139,0.0002044272827736,45.01676270700001,984.0971753069672,70.0,2.0338051671413777,85.16683650341457,3.1620837142750435,90.36272538483104,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-25T20:55:18,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,178910.99009050243,36.57383411976997,0.0002044247483134,45.02196850846154,1350.6654466470088,70.0,2.1608678732700364,90.4864288143628,3.359617919979614,96.00691460761246,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-25T23:50:36,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,189428.0116337631,38.72195476815728,0.0002044151465994,45.01717103812501,1441.2611347140407,70.0,2.2878940351845776,95.80049762950348,3.557374887765809,101.6457665524538,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-26T02:45:54,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,199946.77345186652,40.86993997999653,0.0002044040985229,45.013903326000005,406.9675080369053,70.0,2.4149373430014225,101.11444672149264,3.7548789096403903,107.28426297413448,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-26T05:41:11,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,210463.3381888047,43.01805359410509,0.0002043968985967,45.02427999300001,431.2225532143119,70.0,2.541956054535585,106.42879529046807,3.952345108875106,112.9230964538787,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2025-12-26T05:41:16,Polyglot,3e859185-465d-43ba-9ab0-7701d799406e,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,210468.0922362823,43.01892001240934,0.0002043963983106479,45.024051262499995,1633.6575673245372,70.0,2.5420087603607517,106.43093500495763,3.9524270508238026,112.9253708161421,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.4,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
1 timestamp project_name run_id experiment_id duration emissions emissions_rate cpu_power gpu_power ram_power cpu_energy gpu_energy ram_energy energy_consumed country_name country_iso_code region cloud_provider cloud_region os python_version codecarbon_version cpu_count cpu_model gpu_count gpu_model longitude latitude ram_total_size tracking_mode on_cloud pue
2 2025-12-23T19:21:46 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 498.7068142257631 0.0992674230448318 0.0001990496624734 45.01167615000001 398.1931339192689 70.0 0.0060186035058234 0.2452029375510278 0.0093570730521705 0.2605786141090218 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
3 2025-12-23T22:10:03 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 10595.473367678002 2.161503919738504 0.0002040025815488 45.011090439 403.7976910082256 70.0 0.1279630857588154 5.347085211831427 0.1989349541186509 5.673983251708896 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
4 2025-12-24T01:05:21 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 21113.948883658275 4.311192573704499 0.0002041869380976 45.023994579375 944.4281598953607 70.0 0.2550055074963965 10.665507558510283 0.3964381709128567 11.31695123691954 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
5 2025-12-24T04:00:39 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 31631.96770196408 6.460172577464265 0.000204229235384 45.0185716395 1115.3354135257653 70.0 0.382042096003632 15.982084734546447 0.5939321731623889 16.958059003712467 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
6 2025-12-24T06:55:59 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 42151.54401289113 8.609458274858108 0.0002042501283517 45.01763225357144 1373.9160761454668 70.0 0.5091005723478386 21.29940821673512 0.7914604322271686 22.599969221310165 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
7 2025-12-24T09:51:19 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 52671.232499394566 10.761742638095242 0.000204319172486 45.01066172625001 389.79964354842514 70.0 0.6361519924957892 26.624621368291173 0.9889776251566664 28.24975098594368 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
8 2025-12-24T12:46:37 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 63189.4659346398 12.918288746941286 0.0002044373782222 45.02837354785714 726.7344350816131 70.0 0.7631901183161606 31.961055698268467 1.1864740861355978 33.91071990272027 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
9 2025-12-24T15:41:56 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 73708.00911874697 15.074884482662329 0.0002045216613892 45.0199931085 1012.7033155334932 70.0 0.8902340814154672 37.29760521250461 1.3839797969378391 39.57181909085793 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
10 2025-12-24T18:37:16 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 84228.92013456859 17.224720777664402 0.0002044988912376 45.020015251875 1395.8595638796028 70.0 1.0173064822195157 42.616057198374 1.5818109556170628 45.21517463621053 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
11 2025-12-24T21:32:37 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 94749.86746122874 19.3738614661473 0.0002044737579614 45.02684444142857 468.36104777016834 70.0 1.144376972327447 47.93296970884506 1.7793575218654842 50.85670420303793 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
12 2025-12-25T00:27:59 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 105271.0848454833 21.522790542507483 0.0002044511137516 45.0188224425 1240.6741071835147 70.0 1.2714529576795155 53.24931260219376 1.9769127246981053 56.49767828457142 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
13 2025-12-25T03:23:19 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 115791.69347525388 23.67106977532321 0.0002044280471671 45.007569585000006 397.05607898261405 70.0 1.3985155428854574 58.56398341031587 2.174447562937777 62.13694651613917 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
14 2025-12-25T06:18:39 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 126311.77048727496 25.818846208351 0.0002044057027207 45.02091609000001 556.9667245365437 70.0 1.525579168730944 63.87733187043568 2.3719838508476974 67.77489489001444 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
15 2025-12-25T09:14:00 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 136832.0669031553 27.96662278255894 0.0002043864674086 45.016275768750006 1161.0709139911548 70.0 1.652644048411428 69.1906776613867 2.569521924691652 73.41284363448993 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
16 2025-12-25T12:09:18 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 147350.80217676982 30.120927635009604 0.0002044164482991 45.015666102 1431.8912506906654 70.0 1.7796889506003104 74.52121140331019 2.767028862547157 79.06792921645781 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
17 2025-12-25T15:04:39 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 157870.99625565112 32.26960138544078 0.0002044048758214 45.011602845000006 400.8619258511732 70.0 1.9067452848313229 79.83693430171849 2.964553476688838 84.7082330632387 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
18 2025-12-25T17:59:58 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 168390.8320274204 34.42368023535139 0.0002044272827736 45.01676270700001 984.0971753069672 70.0 2.0338051671413777 85.16683650341457 3.1620837142750435 90.36272538483104 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
19 2025-12-25T20:55:18 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 178910.99009050243 36.57383411976997 0.0002044247483134 45.02196850846154 1350.6654466470088 70.0 2.1608678732700364 90.4864288143628 3.359617919979614 96.00691460761246 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
20 2025-12-25T23:50:36 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 189428.0116337631 38.72195476815728 0.0002044151465994 45.01717103812501 1441.2611347140407 70.0 2.2878940351845776 95.80049762950348 3.557374887765809 101.6457665524538 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
21 2025-12-26T02:45:54 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 199946.77345186652 40.86993997999653 0.0002044040985229 45.013903326000005 406.9675080369053 70.0 2.4149373430014225 101.11444672149264 3.7548789096403903 107.28426297413448 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
22 2025-12-26T05:41:11 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 210463.3381888047 43.01805359410509 0.0002043968985967 45.02427999300001 431.2225532143119 70.0 2.541956054535585 106.42879529046807 3.952345108875106 112.9230964538787 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
23 2025-12-26T05:41:16 Polyglot 3e859185-465d-43ba-9ab0-7701d799406e 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 210468.0922362823 43.01892001240934 0.0002043963983106479 45.024051262499995 1633.6575673245372 70.0 2.5420087603607517 106.43093500495763 3.9524270508238026 112.9253708161421 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.4 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0

190
evals.yaml Normal file
View File

@@ -0,0 +1,190 @@
evaluations:
arc_challenge_poly_pt_acc: 0.32905982905982906
arc_challenge_poly_pt_acc_norm: 0.37435897435897436
arc_challenge_poly_pt_acc_norm_stderr: 0.014154661190814505
arc_challenge_poly_pt_acc_stderr: 0.013742700308521677
arc_challenge_poly_pt_alias: arc_challenge_poly_pt
assin2_rte_acc,all: 0.610702614379085
assin2_rte_acc_stderr,all: 0.006976455542871003
assin2_rte_alias: assin2_rte
assin2_rte_f1_macro,all: 0.5493894012220668
assin2_rte_f1_macro_stderr,all: 0.007313213532417135
assin2_sts_alias: assin2_sts
assin2_sts_mse,all: 1.954166666666667
assin2_sts_mse_stderr,all: N/A
assin2_sts_pearson,all: 0.06429110147600466
assin2_sts_pearson_stderr,all: 0.013460910678684006
assin_entailment_acc: 0.70675
assin_entailment_acc_stderr: 0.007199067024031941
assin_entailment_alias: assin_entailment
assin_paraphrase_acc: 0.72675
assin_paraphrase_acc_stderr: 0.007046880444991351
assin_paraphrase_alias: assin_paraphrase
belebele_por_Latn_acc: 0.5388888888888889
belebele_por_Latn_acc_norm: 0.5388888888888889
belebele_por_Latn_acc_norm_stderr: 0.016625417583086437
belebele_por_Latn_acc_stderr: 0.016625417583086437
belebele_por_Latn_alias: belebele_por_Latn
bluex_acc,all: 0.46870653685674546
bluex_acc,exam_id__UNICAMP_2018: 0.46296296296296297
bluex_acc,exam_id__UNICAMP_2019: 0.46
bluex_acc,exam_id__UNICAMP_2020: 0.5636363636363636
bluex_acc,exam_id__UNICAMP_2021_1: 0.3695652173913043
bluex_acc,exam_id__UNICAMP_2021_2: 0.37254901960784315
bluex_acc,exam_id__UNICAMP_2022: 0.5641025641025641
bluex_acc,exam_id__UNICAMP_2023: 0.6046511627906976
bluex_acc,exam_id__UNICAMP_2024: 0.5777777777777777
bluex_acc,exam_id__USP_2018: 0.37037037037037035
bluex_acc,exam_id__USP_2019: 0.475
bluex_acc,exam_id__USP_2020: 0.4642857142857143
bluex_acc,exam_id__USP_2021: 0.34615384615384615
bluex_acc,exam_id__USP_2022: 0.3877551020408163
bluex_acc,exam_id__USP_2023: 0.5227272727272727
bluex_acc,exam_id__USP_2024: 0.5609756097560976
bluex_acc_stderr,all: 0.010751763481122825
bluex_acc_stderr,exam_id__UNICAMP_2018: 0.03908140507987583
bluex_acc_stderr,exam_id__UNICAMP_2019: 0.04086284322684593
bluex_acc_stderr,exam_id__UNICAMP_2020: 0.03854268045176616
bluex_acc_stderr,exam_id__UNICAMP_2021_1: 0.041049198501562786
bluex_acc_stderr,exam_id__UNICAMP_2021_2: 0.03903627297829125
bluex_acc_stderr,exam_id__UNICAMP_2022: 0.046067534061712136
bluex_acc_stderr,exam_id__UNICAMP_2023: 0.043110276875963825
bluex_acc_stderr,exam_id__UNICAMP_2024: 0.04235722643358554
bluex_acc_stderr,exam_id__USP_2018: 0.03788178159383037
bluex_acc_stderr,exam_id__USP_2019: 0.04557332896428892
bluex_acc_stderr,exam_id__USP_2020: 0.038341658891473986
bluex_acc_stderr,exam_id__USP_2021: 0.038139467533341624
bluex_acc_stderr,exam_id__USP_2022: 0.040012931518371306
bluex_acc_stderr,exam_id__USP_2023: 0.043422447984322925
bluex_acc_stderr,exam_id__USP_2024: 0.044825815810456374
bluex_alias: bluex
calame_pt_acc: 0.5867052023121387
calame_pt_acc_stderr: 0.01081012929476997
calame_pt_alias: calame_pt
calame_pt_perplexity: 7.2359833214160085
calame_pt_perplexity_stderr: 0.42482617041660564
enem_challenge_acc,all: 0.5514345696291113
enem_challenge_acc,exam_id__2009: 0.5043478260869565
enem_challenge_acc,exam_id__2010: 0.5726495726495726
enem_challenge_acc,exam_id__2011: 0.6153846153846154
enem_challenge_acc,exam_id__2012: 0.5344827586206896
enem_challenge_acc,exam_id__2013: 0.5833333333333334
enem_challenge_acc,exam_id__2014: 0.5688073394495413
enem_challenge_acc,exam_id__2015: 0.4789915966386555
enem_challenge_acc,exam_id__2016: 0.5371900826446281
enem_challenge_acc,exam_id__2016_2: 0.5284552845528455
enem_challenge_acc,exam_id__2017: 0.5086206896551724
enem_challenge_acc,exam_id__2022: 0.5639097744360902
enem_challenge_acc,exam_id__2023: 0.6148148148148148
enem_challenge_acc_stderr,all: 0.007618623435000518
enem_challenge_acc_stderr,exam_id__2009: 0.026949045711548218
enem_challenge_acc_stderr,exam_id__2010: 0.026429279577735547
enem_challenge_acc_stderr,exam_id__2011: 0.02589822342752356
enem_challenge_acc_stderr,exam_id__2012: 0.02669879193984667
enem_challenge_acc_stderr,exam_id__2013: 0.02740882906544383
enem_challenge_acc_stderr,exam_id__2014: 0.027336979010138398
enem_challenge_acc_stderr,exam_id__2015: 0.026385065690201895
enem_challenge_acc_stderr,exam_id__2016: 0.026088144599797948
enem_challenge_acc_stderr,exam_id__2016_2: 0.02606861159558674
enem_challenge_acc_stderr,exam_id__2017: 0.02684138658677631
enem_challenge_acc_stderr,exam_id__2022: 0.02485425227418429
enem_challenge_acc_stderr,exam_id__2023: 0.024259924778206914
enem_challenge_alias: enem
faquad_nli_acc,all: 0.7846153846153846
faquad_nli_acc_stderr,all: 0.011396120309131366
faquad_nli_alias: faquad_nli
faquad_nli_f1_macro,all: 0.4396551724137931
faquad_nli_f1_macro_stderr,all: 0.00357969847290883
global_piqa_completions_por_latn_braz_acc: 0.79
global_piqa_completions_por_latn_braz_acc_bytes: 0.75
global_piqa_completions_por_latn_braz_acc_bytes_stderr: 0.04351941398892446
global_piqa_completions_por_latn_braz_acc_norm: 0.74
global_piqa_completions_por_latn_braz_acc_norm_stderr: 0.0440844002276808
global_piqa_completions_por_latn_braz_acc_stderr: 0.040936018074033236
global_piqa_completions_por_latn_braz_alias: global_piqa_completions_por_latn_braz
hatebr_offensive_acc,all: 0.7964285714285714
hatebr_offensive_acc_stderr,all: 0.007613133172324561
hatebr_offensive_alias: hatebr_offensive_binary
hatebr_offensive_f1_macro,all: 0.7963528271484649
hatebr_offensive_f1_macro_stderr,all: 0.00761763073122653
hellaswag_poly_pt_acc: 0.3776140426915159
hellaswag_poly_pt_acc_norm: 0.48434283237620546
hellaswag_poly_pt_acc_norm_stderr: 0.005202393220555622
hellaswag_poly_pt_acc_stderr: 0.005046614926940191
hellaswag_poly_pt_alias: hellaswag_poly_pt
lambada_poly_pt_acc: 0.4513875412381137
lambada_poly_pt_acc_stderr: 0.006932975888368315
lambada_poly_pt_alias: lambada_poly_pt
lambada_poly_pt_perplexity: 15.604144991512424
lambada_poly_pt_perplexity_stderr: 0.5333901055397442
mmlu_poly_pt_acc: 0.39680276193335334
mmlu_poly_pt_acc_stderr: 0.0042385372220186505
mmlu_poly_pt_alias: mmlu_poly_pt
oab_exams_acc,all: 0.40364464692482915
oab_exams_acc,exam_id__2010-01: 0.4
oab_exams_acc,exam_id__2010-02: 0.4
oab_exams_acc,exam_id__2011-03: 0.3434343434343434
oab_exams_acc,exam_id__2011-04: 0.4125
oab_exams_acc,exam_id__2011-05: 0.425
oab_exams_acc,exam_id__2012-06: 0.375
oab_exams_acc,exam_id__2012-06a: 0.475
oab_exams_acc,exam_id__2012-07: 0.3125
oab_exams_acc,exam_id__2012-08: 0.425
oab_exams_acc,exam_id__2012-09: 0.3246753246753247
oab_exams_acc,exam_id__2013-10: 0.425
oab_exams_acc,exam_id__2013-11: 0.4375
oab_exams_acc,exam_id__2013-12: 0.4375
oab_exams_acc,exam_id__2014-13: 0.425
oab_exams_acc,exam_id__2014-14: 0.4125
oab_exams_acc,exam_id__2014-15: 0.41025641025641024
oab_exams_acc,exam_id__2015-16: 0.35
oab_exams_acc,exam_id__2015-17: 0.48717948717948717
oab_exams_acc,exam_id__2015-18: 0.3375
oab_exams_acc,exam_id__2016-19: 0.44871794871794873
oab_exams_acc,exam_id__2016-20: 0.3875
oab_exams_acc,exam_id__2016-20a: 0.3625
oab_exams_acc,exam_id__2016-21: 0.425
oab_exams_acc,exam_id__2017-22: 0.425
oab_exams_acc,exam_id__2017-23: 0.425
oab_exams_acc,exam_id__2017-24: 0.3875
oab_exams_acc,exam_id__2018-25: 0.4375
oab_exams_acc_stderr,all: 0.006053332133276562
oab_exams_acc_stderr,exam_id__2010-01: 0.030610161516106096
oab_exams_acc_stderr,exam_id__2010-02: 0.028296630437280906
oab_exams_acc_stderr,exam_id__2011-03: 0.027549621066797744
oab_exams_acc_stderr,exam_id__2011-04: 0.031730143003018005
oab_exams_acc_stderr,exam_id__2011-05: 0.03187425289992289
oab_exams_acc_stderr,exam_id__2012-06: 0.031303363618806604
oab_exams_acc_stderr,exam_id__2012-06a: 0.03204492958151872
oab_exams_acc_stderr,exam_id__2012-07: 0.030024899757931458
oab_exams_acc_stderr,exam_id__2012-08: 0.03186228006495766
oab_exams_acc_stderr,exam_id__2012-09: 0.030794179748498546
oab_exams_acc_stderr,exam_id__2013-10: 0.03186391626480471
oab_exams_acc_stderr,exam_id__2013-11: 0.03202492621379354
oab_exams_acc_stderr,exam_id__2013-12: 0.03195252134301974
oab_exams_acc_stderr,exam_id__2014-13: 0.03180998638392341
oab_exams_acc_stderr,exam_id__2014-14: 0.03183014975650971
oab_exams_acc_stderr,exam_id__2014-15: 0.03207677106125773
oab_exams_acc_stderr,exam_id__2015-16: 0.030649958521189145
oab_exams_acc_stderr,exam_id__2015-17: 0.03266206847243709
oab_exams_acc_stderr,exam_id__2015-18: 0.030453791802761587
oab_exams_acc_stderr,exam_id__2016-19: 0.03245421470850268
oab_exams_acc_stderr,exam_id__2016-20: 0.03147299166016239
oab_exams_acc_stderr,exam_id__2016-20a: 0.031016128861774545
oab_exams_acc_stderr,exam_id__2016-21: 0.03173265714315922
oab_exams_acc_stderr,exam_id__2017-22: 0.03191749762684417
oab_exams_acc_stderr,exam_id__2017-23: 0.0320020087581464
oab_exams_acc_stderr,exam_id__2017-24: 0.031539234776498976
oab_exams_acc_stderr,exam_id__2018-25: 0.03202176928915031
oab_exams_alias: oab_exams
portuguese_hate_speech_acc,all: 0.43478260869565216
portuguese_hate_speech_acc_stderr,all: 0.012048917796997311
portuguese_hate_speech_alias: portuguese_hate_speech_binary
portuguese_hate_speech_f1_macro,all: 0.42838929145469706
portuguese_hate_speech_f1_macro_stderr,all: 0.011986624066160297
tweetsentbr_acc,all: 0.4736318407960199
tweetsentbr_acc_stderr,all: 0.007893686144300348
tweetsentbr_alias: tweetsentbr
tweetsentbr_f1_macro,all: 0.2678826295357763
tweetsentbr_f1_macro_stderr,all: 0.005779014212286808
step: 50000

22
evals_all_steps.csv Normal file

File diff suppressed because one or more lines are too long

27
evals_for_comparison.csv Normal file

File diff suppressed because one or more lines are too long

14
generation_config.json Normal file
View File

@@ -0,0 +1,14 @@
{
"bos_token_id": 1,
"eos_token_id": 2,
"pad_token_id": 49109,
"transformers_version": "4.53.2",
"do_sample": true,
"max_new_tokens": 1024,
"renormalize_logits": true,
"repetition_penalty": 1.2,
"temperature": 0.1,
"top_k": 50,
"top_p": 1.0,
"use_cache": false
}

3
logo.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1856d91c3b35390cee5122902d94044657c67df7034ca4005316275c404fc8a0
size 197189

3
model.safetensors Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3659ecd99edb6961a5decc610f52503fa4b2d0376c738fb3086ef005d7f89217
size 981633328

82
ruler.yaml Normal file
View File

@@ -0,0 +1,82 @@
model_name: Tucano2-qwen-0.5B-Base
results:
niah_pt_multikey_1_1024: 0.376
niah_pt_multikey_1_1024_stderr: 0.02168382753928621
niah_pt_multikey_1_2048: 0.386
niah_pt_multikey_1_2048_stderr: 0.021793529219281196
niah_pt_multikey_1_4096: 0.356
niah_pt_multikey_1_4096_stderr: N/A
niah_pt_multikey_1_alias: " - niah_pt_multikey_1"
niah_pt_multikey_2_1024: 0.2
niah_pt_multikey_2_1024_stderr: 0.01790645924143381
niah_pt_multikey_2_2048: 0.086
niah_pt_multikey_2_2048_stderr: 0.012550818542716023
niah_pt_multikey_2_4096: 0.024
niah_pt_multikey_2_4096_stderr: N/A
niah_pt_multikey_2_alias: " - niah_pt_multikey_2"
niah_pt_multikey_3_1024: 0.236
niah_pt_multikey_3_1024_stderr: 0.01900869962208476
niah_pt_multikey_3_2048: 0.09
niah_pt_multikey_3_2048_stderr: 0.012811255071733842
niah_pt_multikey_3_4096: 0.044
niah_pt_multikey_3_4096_stderr: N/A
niah_pt_multikey_3_alias: " - niah_pt_multikey_3"
niah_pt_multiquery_1024: 0.1405
niah_pt_multiquery_1024_stderr: 0.010839949677836887
niah_pt_multiquery_2048: 0.121
niah_pt_multiquery_2048_stderr: 0.009920424872923468
niah_pt_multiquery_4096: 0.042
niah_pt_multiquery_4096_stderr: N/A
niah_pt_multiquery_alias: " - niah_pt_multiquery"
niah_pt_multivalue_1024: 0.2165
niah_pt_multivalue_1024_stderr: 0.012341398633885907
niah_pt_multivalue_2048: 0.1295
niah_pt_multivalue_2048_stderr: 0.010243014140249671
niah_pt_multivalue_4096: 0.0525
niah_pt_multivalue_4096_stderr: N/A
niah_pt_multivalue_alias: " - niah_pt_multivalue"
niah_pt_single_1_1024: 0.236
niah_pt_single_1_1024_stderr: 0.01900869962208476
niah_pt_single_1_2048: 0.228
niah_pt_single_1_2048_stderr: 0.018781306529363172
niah_pt_single_1_4096: 0.172
niah_pt_single_1_4096_stderr: N/A
niah_pt_single_1_alias: " - niah_pt_single_1"
niah_pt_single_2_1024: 0.32
niah_pt_single_2_1024_stderr: 0.02088234048876172
niah_pt_single_2_2048: 0.374
niah_pt_single_2_2048_stderr: 0.021660710347204473
niah_pt_single_2_4096: 0.37
niah_pt_single_2_4096_stderr: N/A
niah_pt_single_2_alias: " - niah_pt_single_2"
niah_pt_single_3_1024: 0.604
niah_pt_single_3_1024_stderr: 0.021893529941665716
niah_pt_single_3_2048: 0.6
niah_pt_single_3_2048_stderr: 0.02193084412072858
niah_pt_single_3_4096: 0.502
niah_pt_single_3_4096_stderr: N/A
niah_pt_single_3_alias: " - niah_pt_single_3"
ruler_pt_4096: 0.25705757575757576
ruler_pt_4096_stderr: N/A
ruler_pt_alias: ruler_pt
ruler_pt_cwe_1024: 0.5392
ruler_pt_cwe_1024_stderr: 0.01332673717935847
ruler_pt_cwe_2048: 0.321
ruler_pt_cwe_2048_stderr: 0.009951686899430094
ruler_pt_cwe_4096: 0.21059999999999998
ruler_pt_cwe_4096_stderr: N/A
ruler_pt_cwe_alias: " - ruler_pt_cwe"
ruler_pt_fwe_1024: 0.5753333333333333
ruler_pt_fwe_1024_stderr: 0.013569419559555793
ruler_pt_fwe_2048: 0.48533333333333334
ruler_pt_fwe_2048_stderr: 0.012767415662313082
ruler_pt_fwe_4096: 0.47333333333333333
ruler_pt_fwe_4096_stderr: N/A
ruler_pt_fwe_alias: " - ruler_pt_fwe"
ruler_pt_vt_1024: 0.9272
ruler_pt_vt_1024_stderr: 0.008164167272570364
ruler_pt_vt_2048: 0.842
ruler_pt_vt_2048_stderr: 0.00944936935262479
ruler_pt_vt_4096: 0.5812
ruler_pt_vt_4096_stderr: N/A
ruler_pt_vt_alias: " - ruler_pt_vt"

30
special_tokens_map.json Normal file
View File

@@ -0,0 +1,30 @@
{
"bos_token": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<|pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<|unk|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

463711
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

397
tokenizer_config.json Normal file
View File

@@ -0,0 +1,397 @@
{
"add_bos_token": false,
"add_eos_token": false,
"add_prefix_space": null,
"added_tokens_decoder": {
"0": {
"content": "<|unk|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49109": {
"content": "<|pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49110": {
"content": "<tools>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49111": {
"content": "</tools>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49112": {
"content": "<tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49113": {
"content": "</tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49114": {
"content": "<tool_response>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49115": {
"content": "</tool_response>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49116": {
"content": "<think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49117": {
"content": "</think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49118": {
"content": "<answer>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49119": {
"content": "</answer>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49120": {
"content": "<context>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49121": {
"content": "</context>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49122": {
"content": "<|fim_prefix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49123": {
"content": "<|fim_suffix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49124": {
"content": "<|fim_middle|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49125": {
"content": "<|repo_name|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49126": {
"content": "<|image|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49127": {
"content": "<|image_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49128": {
"content": "<|image_placeholder|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49129": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49130": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49131": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49132": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49133": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49134": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49135": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49136": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49137": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49138": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49139": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49140": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49141": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49142": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49143": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49144": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49145": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49146": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49147": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49148": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49149": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49150": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49151": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
}
},
"bos_token": "<|im_start|>",
"bos_token_id": 1,
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"eos_token_id": 2,
"extra_special_tokens": {},
"legacy": false,
"model_input_names": [
"input_ids",
"attention_mask"
],
"model_max_length": 4096,
"pad_token": "<|pad|>",
"pad_token_id": 49109,
"padding_side": "right",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "PreTrainedTokenizerFast",
"truncation_side": "right",
"unk_token": "<|unk|>",
"unk_token_id": 0,
"use_default_system_prompt": false
}

3
train_logs.parquet Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:0e501642df6d9e0eaf80010a94d636d82a4c12751c2db4ba019619cc3c1f9b8a
size 1212118

93
training_config.yaml Normal file
View File

@@ -0,0 +1,93 @@
# Directory settings
checkpoint_dir: "/lustre/scratch/data/polyglot_datasets/portuguese/checkpoints/models/Tucano2-qwen-0.5B-Base"
train_dataset_dir:
# Total: ~48B
# Web Text (~28B, 58%)
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2/5 # 0.1B (PT)
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2/4 # 28B (PT)
# Synthetic Text (~20B, 42%)
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2_synth # 10B (PT)
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2_synth # 10B (PT)
val_dataset_dir: "/lustre/scratch/data/polyglot_datasets/portuguese/tokenized/validation"
dataset_type: "parquet"
cache_dir: "/lustre/mlnvme/data/nanotronics/.cache"
# Data loading settings
pin_memory: true
num_workers_for_dataloader: 16
shuffle_dataset: true
mask_eos_token: false
mask_pad_token: false
# Model architecture settings
vocab_size: 49152
num_hidden_layers: 28
num_attention_heads: 16
num_key_value_heads: 8
head_dim: 128
hidden_size: 1024
intermediate_size: 3072
max_position_embeddings: 4096
tie_word_embeddings: true
hidden_act: "silu"
output_hidden_states: false
attn_implementation: "flash_attention_2"
use_cache: false
no_rope_layer_interval: null
rope_theta: 1000000.0
rope_scale_factor: null
rms_norm_eps: 0.000001
# Training settings
total_batch_size: 1048576
micro_batch_size: 8
eval_micro_batch_size: 8
num_train_epochs: 1
warmup_steps: 100
max_learning_rate: 0.00025
min_learning_rate: 0.0
muon_learning_rate: null
weight_decay: 0.1
beta1: 0.9
beta2: 0.95
eps: 0.00000001
lr_decay_type: "cosine"
use_sqrt: true
lr_decay_iters_coef: 1.
seed: 42
max_steps: 50000
max_grad_norm: 1.0
# Precision and optimization settings
torch_compile: false
mat_mul_precision: "highest"
tf32: true
bf16: true
gradient_checkpointing: false
use_liger_kernel: true
static_graph: false
# Hub settings
push_to_hub: false
hub_token: null
hub_model_id: null
# Tokenizer and Reference model
tokenizer_name_or_path: "/lustre/scratch/data/polyglot_datasets/portuguese/checkpoints/models/tucano2-qwen-0.5B"
chat_template_path: null
reference_model: "/lustre/scratch/data/polyglot_datasets/portuguese/checkpoints/models/tucano2-qwen-0.5B"
continual_pretraining: true
# Checkpoint settings
resume_from_checkpoint: null
checkpointing_steps: 2500
begin_new_stage: true
stage_name: "single_cosine"
# Miscellaneous settings
sanity_check: false
sanity_check_num_samples: 100000
wandb_token: null
wandb_id: "tucano2-qwen-0.5b"
wandb_project: "Polyglot"
wandb_desc: "Developing LLMs for low-resource languages"

3
val_logs.parquet Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6ca1e0a0d7ecb1fa4a37b8173fe006b93dccbf4609fb89a4cedb27a17eb4a2f1
size 1963