初始化项目,由ModelHub XC社区提供模型

Model: Polygl0t/Tucano2-qwen-1.5B-Base
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-07 02:18:18 +08:00
commit 9f2b6e29ef
33 changed files with 465309 additions and 0 deletions

59
.gitattributes vendored Normal file
View File

@@ -0,0 +1,59 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
logo.png filter=lfs diff=lfs merge=lfs -text
benchmarks_hard.png filter=lfs diff=lfs merge=lfs -text
learning_curve.png filter=lfs diff=lfs merge=lfs -text
benchmarks_easy.png filter=lfs diff=lfs merge=lfs -text
gradient_norm.png filter=lfs diff=lfs merge=lfs -text
npm_hard.png filter=lfs diff=lfs merge=lfs -text
performance_vs_compute.png filter=lfs diff=lfs merge=lfs -text
npm_easy.png filter=lfs diff=lfs merge=lfs -text
.plots/arc_challenge.png filter=lfs diff=lfs merge=lfs -text
.plots/before_and_after.png filter=lfs diff=lfs merge=lfs -text
.plots/belebele.png filter=lfs diff=lfs merge=lfs -text
.plots/bluex.png filter=lfs diff=lfs merge=lfs -text
.plots/calame.png filter=lfs diff=lfs merge=lfs -text
.plots/enem.png filter=lfs diff=lfs merge=lfs -text
.plots/global_piqa.png filter=lfs diff=lfs merge=lfs -text
.plots/gradient_norm.png filter=lfs diff=lfs merge=lfs -text
.plots/hellaswag.png filter=lfs diff=lfs merge=lfs -text
.plots/lambada.png filter=lfs diff=lfs merge=lfs -text
.plots/learning_curve.png filter=lfs diff=lfs merge=lfs -text
.plots/mmlu.png filter=lfs diff=lfs merge=lfs -text
.plots/npm_easy.png filter=lfs diff=lfs merge=lfs -text
.plots/npm_hard.png filter=lfs diff=lfs merge=lfs -text
.plots/oab.png filter=lfs diff=lfs merge=lfs -text
.plots/performance_vs_compute.png filter=lfs diff=lfs merge=lfs -text

3
.plots/arc_challenge.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:a0515e52aa953710e40ad0d7c8afd53616c7d5e21baca77cb0a6e30b66c698ba
size 214244

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9ef79a4e69550354b943b9afdcdba327e5955efd2936863bdb2cfdd76d55b469
size 274768

3
.plots/belebele.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:81ca953fa216916004f8372239a1fd6a1ab35879708d1adb0fe693260e1637e0
size 207554

3
.plots/bluex.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:b8632030c0d198315dbe6a52b28fa212c6ca871723e9483a63ebe385b384519e
size 216448

3
.plots/calame.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:73e2c054134e9839615b56f37abf0a2e5b6ea23e3bfc38860a65243a27edbaf2
size 184143

3
.plots/enem.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9ffafb63234d8ae7e700fefd5aacfca05fb593dd6546fe1d15acd35f4d1aacef
size 216780

3
.plots/global_piqa.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d2198b12ea2f1ec9eee7429c15292ed0eda41c0da7e20e6101c195a40bfb37f1
size 208997

3
.plots/gradient_norm.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:bb8faf475fc449942a938d829b029a508778891308adfbcae4333317f8419302
size 276097

3
.plots/hellaswag.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:3e68abf1294563b9e19ca22fa9eaa2fe3121456232f280521e0d49fa1d3db29c
size 187890

3
.plots/lambada.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:95688a0b987a681e040c92f2981d0e8d0376d6dd603ca3a9af6519508d25e09c
size 190285

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:6049a3d73f84e79cf08c669e6cc97a244c12e5b934e98a6c9201a4cc5b5f0ce3
size 214493

3
.plots/mmlu.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c4bd96a3b6717acaf30748607a49747a62faf4ae9d6261b1551fb6b4698b28d7
size 184938

3
.plots/npm_easy.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:4b4519ee5c082058513e4694fd94977100d165e941978871f913c4866ef52794
size 209025

3
.plots/npm_hard.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5d05298ea6f888e4fe93549c05888030b3be66468f07265f46605105d07fd36d
size 227584

3
.plots/oab.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:00d55340f750e295bc1aac92ac529ee04d75b345a7e35a482836694f1c1f5b02
size 210259

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:c4a657d6d68d6f2f1fbf0d3750eac546a7ead054995a466497e0e31bfeedd49c
size 949327

495
README.md Normal file
View File

@@ -0,0 +1,495 @@
---
language:
- pt
license: apache-2.0
library_name: transformers
tags:
- text-generation-inference
datasets:
- Polygl0t/gigaverbo-v2
- Polygl0t/gigaverbo-v2-synth
metrics:
- perplexity
pipeline_tag: text-generation
widget:
- text: "A floresta da Amazônia é conhecida por sua"
example_title: Exemplo
- text: "Uma das coisas que Portugal, Angola, Brasil e Moçambique tem em comum é o"
example_title: Exemplo
- text: "O Carnaval do Rio de Janeiro é"
example_title: Exemplo
inference:
parameters:
repetition_penalty: 1.2
temperature: 0.1
top_k: 50
top_p: 1.0
max_new_tokens: 150
co2_eq_emissions:
emissions: 334000
source: CodeCarbon
training_type: pre-training
geographical_location: Germany
hardware_used: NVIDIA A100-SXM4-80GB
model-index:
- name: Tucano2-qwen-1.5B-Base
results:
- task:
type: text-generation
name: Text Generation
dataset:
name: ARC Challenge
type: Polygl0t/ARC-poly
split: test
args:
num_few_shot: 5
metrics:
- type: acc_norm
value: 48.21
name: Acc-norm
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: arc_challenge_poly_pt
- task:
type: text-generation
name: Text Generation
dataset:
name: HellaSwag
type: Polygl0t/HellaSwag-poly
split: validation
args:
num_few_shot: 5
metrics:
- type: acc_norm
value: 56.25
name: Acc-norm
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: hellaswag_poly_pt
- task:
type: text-generation
name: Text Generation
dataset:
name: Calame
type: Polygl0t/CALAME-PT
split: test
args:
num_few_shot: 5
metrics:
- type: acc
value: 59.06
name: Acc
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: calame_poly_pt
- task:
type: text-generation
name: Text Generation
dataset:
name: Lambada
type: Polygl0t/LAMBADA-poly
split: test
args:
num_few_shot: 5
metrics:
- type: acc
value: 54.2
name: Acc
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: lambada_poly_pt
- task:
type: text-generation
name: Text Generation
dataset:
name: Global PIQA
type: mrlbenchmarks/global-piqa-nonparallel
split: test
args:
num_few_shot: 5
metrics:
- type: acc_norm
value: 77
name: Acc-norm
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: global_piqa_completions_por_latn_braz
- task:
type: text-generation
name: Text Generation
dataset:
name: MMLU
type: Polygl0t/MMLU-poly
split: test
args:
num_few_shot: 5
metrics:
- type: acc
value: 54.04
name: Acc
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: mmlu_poly_pt
- task:
type: text-generation
name: Text Generation
dataset:
name: BELEBELE
type: facebook/belebele
split: test
args:
num_few_shot: 5
metrics:
- type: acc_norm
value: 74
name: Acc-norm
source:
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
name: belebele_por_Latn
- task:
type: text-generation
name: Text Generation
dataset:
name: BLUEX
type: eduagarcia-temp/BLUEX_without_images
split: train
args:
num_few_shot: 3
metrics:
- type: acc
value: 55.91
name: Acc
source:
url: https://github.com/eduagarcia/lm-evaluation-harness-pt
name: bluex
- task:
type: text-generation
name: Text Generation
dataset:
name: ENEM Challenge
type: eduagarcia/enem_challenge
split: train
args:
num_few_shot: 3
metrics:
- type: acc
value: 68.72
name: Acc
source:
url: https://github.com/eduagarcia/lm-evaluation-harness-pt
name: enem_challenge
- task:
type: text-generation
name: Text Generation
dataset:
name: OAB Exams
type: eduagarcia/oab_exams
split: train
args:
num_few_shot: 3
metrics:
- type: acc
value: 48.29
name: Acc
source:
url: https://github.com/eduagarcia/lm-evaluation-harness-pt
name: oab_exams
base_model: Qwen/Qwen3-1.7B-Base
---
# Tucano2-qwen-1.5B-Base
<img src="./logo.png" alt="An illustration of a Tucano bird showing vibrant colors like yellow, orange, blue, green, and black." height="200">
## Model Summary
**[Tucano2-qwen-1.5B-Base](https://huggingface.co/Polygl0t/Tucano2-qwen-1.5B-Base)** is a decoder-only transformer continually pretrained from [Qwen3-1.7B-Base](https://huggingface.co/Qwen/Qwen3-1.7B-Base). Tucano2 is part of the [Polygl0t](https://huggingface.co/Polygl0t) initiative, which aims to advance language models for low-resource languages.
Tucano2-qwen-1.5B-Base shares the same tokenizer as **[Tucano2-0.6B-Base](https://huggingface.co/Polygl0t/Tucano2-0.6B-Base)**. Token embedding transplantation via _Orthogonal Matching Pursuit_ was used to adapt Qwen3-1.7B-Base to be more sensitive to the lexical, morphological, and orthographic properties of Portuguese.
The model was continually pretrained on approximately 50 billion tokens and achieves state-of-the-art performance across several benchmarks designed to evaluate Portuguese language models. **All data, source code, and recipes used to develop the Tucano2 series are open and fully reproducible.**
## Details
- **Architecture:** a Transformer-based model ([`qwen3`](https://huggingface.co/docs/transformers/main/en/model_doc/qwen3))
- **Size:** 1,510,073,344 parameters
- **Context length:** 4,096 tokens
- **Dataset(s):**
- [Polygl0t/gigaverbo-v2](https://huggingface.co/datasets/Polygl0t/gigaverbo-v2)
- [Polygl0t/gigaverbo-v2-synth](https://huggingface.co/datasets/Polygl0t/gigaverbo-v2-synth)
- **Language(s):** Portuguese
- **Batch size:** 1,048,576 tokens
- **Number of steps:** 100,000
- **GPU:** 8 NVIDIA A100-SXM4-80GB
- **Training time**: ~ 215 hours
- **Emissions:** 334 KgCO2 (Germany)
- **Total energy consumption:** 878 kWh
This repository has the [source code](https://github.com/Polygl0t/llm-foundry) used to train this model. The full configuration used for training is available in the following config file:
- Single stage (linear warmup with cosine decay): [training_config.yaml](training_config.yaml)
### Checkpoints
Checkpoints were saved every 2,500 steps, which equates to approximately 2.5 billion tokens. The main branch of this repository contains the final checkpoint saved at step 100000. All other checkpoints are available as separate branches. To load a specific checkpoint, you can use the following code snippet:
```python
from transformers import AutoModelForCausalLM, AutoTokenizer
model_id = "Polygl0t/Tucano2-qwen-1.5B-Base"
revision = "step-2500" # Change this to the desired checkpoint branch
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id, revision=revision)
```
Or, you can access all the revisions for the models via the following code snippet:
```python
from huggingface_hub import list_repo_refs
out = list_repo_refs("Polygl0t/Tucano2-qwen-1.5B-Base")
branches = [b.name for b in out.branches]
print(branches)
```
<details>
<summary><b>Learning Curves</b></summary>
![Learning Curves](./.plots/learning_curve.png)
This plot illustrates the evolution of model performance (measured by loss) as a function of training time, measured in tokens seen during training
</details>
<details>
<summary><b>Gradient Norms (L2)</b></summary>
![Gradient Norms](./.plots/gradient_norm.png)
This plot illustrates the evolution of gradient norms as a function of training time, measured in tokens seen during training.
</details>
## Intended Uses
The primary intended use of Tucano2-qwen-1.5B-Base is to serve as a foundation for research and development involving Portuguese language modeling. Checkpoints saved during training are designed to provide a controlled setting for performing comparative experiments, specifically regarding the effects of continual pretraining on the performance of currently available benchmarks. You may also fine-tune and adapt Tucano2-qwen-1.5B-Base for deployment if your use follows the Apache 2.0 license. If you decide to use Tucano2-qwen-1.5B-Base as a basis for your fine-tuned model, please conduct your own risk and bias assessment.
## Out-of-scope Use
- Tucano2-qwen-1.5B-Base is **not intended for deployment**. It is not an out-of-the-box product and should not be used for human-facing interactions.
- Tucano2-qwen-1.5B-Base is for **the Portuguese language only** and is unsuitable for text generation tasks in other languages.
- Tucano2-qwen-1.5B-Base has **not been fine-tuned** for downstream tasks.
## Basic usage
```python
from transformers import GenerationConfig, TextGenerationPipeline, AutoTokenizer, AutoModelForCausalLM
import torch
# Specify the model and tokenizer
model_id = "Polygl0t/Tucano2-qwen-1.5B-Base"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
# Specify the generation parameters as you like
generation_config = GenerationConfig(
**{
"do_sample": True,
"max_new_tokens": 150,
"renormalize_logits": True,
"repetition_penalty": 1.2,
"temperature": 0.1,
"top_k": 50,
"top_p": 1.0,
"use_cache": True,
}
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
generator = TextGenerationPipeline(model=model, task="text-generation", tokenizer=tokenizer, device=device)
# Generate text
prompt = "# A floresta da Amazônia: um lugar de Magia\n\n"
completion = generator(prompt, generation_config=generation_config)
print(completion[0]['generated_text'])
```
## Limitations
As almost all other language models trained on large text datasets scraped from the web, the Tucano2-qwen-1.5B-Base shows behavior that does not make it an out-of-the-box solution to many real-world applications, especially those requiring factual, reliable, and nontoxic text generation. Tucano2-qwen-1.5B-Base is subject to the following:
- **Hallucinations:** Tucano2-qwen-1.5B-Base can produce content that can be mistaken as facts, but is misleading or entirely false, i.e., hallucinations.
- **Biases and Toxicity:** Tucano2-qwen-1.5B-Base inherits the social and historical stereotypes from the data used to train it. Given these biases, the model can produce toxic content, i.e., harmful, offensive, or detrimental to individuals, groups, or communities.
- **Language Limitations:** Tucano2-qwen-1.5B-Base is primarily designed to interact with Portuguese. Other languages might challenge its comprehension, leading to potential misinterpretations or errors in response.
- **Repetition and Verbosity:** Tucano2-qwen-1.5B-Base may get stuck on repetition loops (especially if the repetition penalty during generations is set to a meager value) or produce verbose responses unrelated to the prompt it was given.
Hence, even though Tucano2-qwen-1.5B-Base is released under a permissive license, we urge users to perform their own risk analysis before using it for real-world applications.
## Evaluations
The table below compares the Tucano2 series against other base models of similar size. We divide our evaluations into two sets:
- **Easy Set**: CALAME, GlobalPIQA, LAMBADA, ARC-Challenge, HellaSwag
- **Hard Set**: ENEM, BLUEX, OAB Exams, BELEBELE, MMLU
The NPM (Normalized Performance Metric) provides a balanced view of model performance across tasks, accounting for each task's inherent difficulty by normalizing its evaluation score relative to its random baseline.
| | Total Avg. | Easy Set (NPM) | Hard Set (NPM) |
| -------------------------- | ---------- | -------------- | -------------- |
| **Tucano2-qwen-3.7B-Base** | 59.21 | 57.41 | 61 |
| Qwen2.5-7B | 57.97 | 54.12 | 61.83 |
| Qwen3-4B-Base | 57.86 | 52.52 | 63.2 |
| SmolLM3-3B-Base | 50.25 | 54.06 | 46.44 |
| Qwen2.5-3B | 50.16 | 47.69 | 52.62 |
| **Tucano2-qwen-1.5B-Base** | 47.9 | 47.97 | 47.82 |
| Curio-edu-7b | 45.66 | 57.46 | 33.87 |
| Qwen3-1.7B-Base | 44.48 | 40.94 | 48.03 |
| Curio-7b | 42.79 | 58.97 | 26.6 |
| Llama-3.2-3B | 40.5 | 43.79 | 37.21 |
| granite-3.3-2b-base | 39.97 | 45.31 | 34.63 |
| **Tucano2-qwen-0.5B-Base** | 35.36 | 39.93 | 30.79 |
| Qwen3-0.6B-Base | 29.4 | 26.41 | 32.38 |
| Llama-2-7b-hf | 29.36 | 42.69 | 16.03 |
| **Tucano2-0.6B-Base** | 20.64 | 40.28 | 0.99 |
| Qwen2.5-0.5B | 19.89 | 18.7 | 21.09 |
| Curio-1.1b | 19.23 | 39.16 | -0.69 |
| Tucano-2b4 | 17.88 | 33.55 | 2.2 |
| Curio-edu-1b1 | 17.72 | 34.77 | 0.67 |
| Llama-3.2-1B | 16.57 | 28.32 | 4.83 |
| Tucano-1b1 | 15.44 | 29.12 | 1.76 |
| Tucano-630m | 14.9 | 26.99 | 2.8 |
| Carvalho_pt-gl-1.3B | 12.54 | 26.75 | -1.66 |
| TeenyTinyLlama-460m | 11.18 | 19.65 | 2.72 |
| Tucano-160m | 8.78 | 19.12 | -1.56 |
| TeenyTinyLlama-160m | 7.72 | 15.75 | -0.31 |
| GlorIA-1.3B | 5.93 | 27.27 | -15.42 |
<details>
<summary><b>Evaluation Suite</b></summary>
| **Benchmark** | **n-shot** | **Type** | **Baseline** | **Metric** |
| --------------- | ---------- | ------------------ | ------------ | ---------- |
| **Easy Set** | | | | |
| CALAME | 5-shot | Completion | 0 | `acc` |
| GlobalPIQA | 5-shot | Completion | 50 | `acc_norm` |
| LAMBADA | 5-shot | Completion | 0 | `acc` |
| ARC-Challenge | 5-shot | MC-Q&A | 25 | `acc_norm` |
| HellaSwag | 5-shot | Completion | 25 | `acc_norm` |
| **Hard Set** | | | | |
| ENEM           | 3-shot     | MC-Q&A             | 20           | `acc` |
| BLUEX           | 3-shot     | MC-Q&A             | 22.5         | `acc` |
| OAB Exams       | 3-shot     | MC-Q&A             | 25           | `acc` |
| BELEBELE | 5-shot | MC-Q&A | 25 | `acc_norm` |
| MMLU | 5-shot | MC-Q&A | 25 | `acc` |
</details>
<details>
<summary><b>Individual Benchmarks</b></summary>
| | BLUEX | ENEM | OAB | ARC Challenge | BELEBELE | CALAME | Global PIQA | HellaSwag | LAMBADA | MMLU |
| -------------------------- | ----- | ----- | ----- | ------------- | -------- | ------ | ----------- | --------- | ------- | ----- |
| **Tucano2-qwen-3.7B-Base** | 66.2 | 77.54 | 58.45 | 57.78 | 83.67 | 61.08 | 83 | 65.32 | 62.53 | 65.4 |
| Qwen2.5-7B | 65.92 | 75.02 | 55.03 | 54.19 | 89.67 | 58.96 | 78 | 67.92 | 59.52 | 68.55 |
| Qwen3-4B-Base | 69.96 | 77.61 | 55.58 | 54.53 | 87.89 | 57.95 | 77 | 63.19 | 60.37 | 68.59 |
| SmolLM3-3B-Base | 54.52 | 61.37 | 45.51 | 51.37 | 77.67 | 59.15 | 81 | 65.57 | 59.89 | 56.19 |
| Qwen2.5-3B | 58.28 | 67.32 | 50.34 | 45.21 | 83.22 | 58.38 | 75 | 59.44 | 57.17 | 59.79 |
| **Tucano2-qwen-1.5B-Base** | 55.91 | 68.72 | 48.29 | 48.21 | 74 | 59.06 | 77 | 56.25 | 54.2 | 54.04 |
| Curio-edu-7b | 47.15 | 58.64 | 43.78 | 50.94 | 53 | 60.79 | 86 | 66.48 | 64.62 | 45.14 |
| Qwen3-1.7B-Base | 57.16 | 65.22 | 45.79 | 47.18 | 77.89 | 53.56 | 67 | 52.55 | 50.81 | 55.49 |
| Curio-7b | 43.39 | 50.59 | 39.68 | 48.03 | 45.33 | 63.44 | 89 | 67.58 | 65.94 | 40.83 |
| Llama-3.2-3B | 50.35 | 53.04 | 39.45 | 41.11 | 68.89 | 54.48 | 69 | 59.14 | 59.48 | 48.28 |
| granite-3.3-2b-base | 45.34 | 54.02 | 39.54 | 41.37 | 65.67 | 58.77 | 70 | 60.81 | 58.22 | 45.63 |
| **Tucano2-qwen-0.5B-Base** | 46.87 | 55.14 | 40.36 | 37.44 | 53.89 | 58.67 | 74 | 48.43 | 45.14 | 39.68 |
| Qwen3-0.6B-Base | 42.98 | 49.48 | 40.46 | 36.92 | 65 | 45.95 | 54 | 40.33 | 41.78 | 43.54 |
| Llama-2-7b-hf | 31.29 | 31.77 | 35.49 | 42.14 | 41.44 | 54.53 | 67 | 56.76 | 59.73 | 38.64 |
| **Tucano2-0.6B-Base** | 21.14 | 23.58 | 23.28 | 37.01 | 26.22 | 57.61 | 79 | 47.74 | 39.45 | 27.18 |
| Qwen2.5-0.5B | 32.55 | 38.91 | 35.9 | 28.46 | 49.56 | 44.89 | 44 | 37.7 | 39.08 | 41.17 |
| Curio-1.1b | 21.56 | 21.06 | 23.1 | 30.43 | 22.89 | 59.25 | 75 | 49.45 | 46.69 | 26.35 |
| Tucano-2b4 | 25.45 | 21.62 | 26.74 | 30.43 | 25.89 | 50.34 | 73 | 48.85 | 32.39 | 26.24 |
| Curio-edu-1b1 | 23.5 | 19.87 | 25.01 | 32.22 | 26.22 | 54.91 | 69 | 46.3 | 42.93 | 25.43 |
| Llama-3.2-1B | 24.06 | 23.93 | 26.06 | 31.71 | 33.33 | 50 | 55 | 45.27 | 45.6 | 28.51 |
| Tucano-1b1 | 25.45 | 21.55 | 26.38 | 30.09 | 25.67 | 48.94 | 68 | 44.1 | 28.43 | 25.26 |
| Tucano-630m | 26.7 | 21.69 | 26.92 | 28.72 | 27.33 | 47.3 | 68 | 40.37 | 26.2 | 25.6 |
| Carvalho_pt-gl-1.3B | 19.33 | 18.12 | 22.32 | 27.01 | 26.44 | 53.42 | 63 | 38.53 | 33.59 | 24.82 |
| TeenyTinyLlama-460m | 25.87 | 20.15 | 27.02 | 27.35 | 28.11 | 42.49 | 59 | 34.81 | 21.56 | 26.65 |
| Tucano-160m | 24.76 | 20.57 | 17.22 | 25.56 | 23.44 | 43.59 | 59 | 33.73 | 21.64 | 25.77 |
| TeenyTinyLlama-160m | 22.53 | 18.89 | 22.32 | 24.02 | 26.78 | 39.79 | 58 | 29.89 | 17.74 | 25.74 |
| GlorIA-1.3B | 4.31 | 2.52 | 4.69 | 26.41 | 22.78 | 54.67 | 64 | 36.35 | 36.68 | 23.69 |
</details>
## Performance and Compute
Below, we display the performance of Tucano2-qwen-1.5B-Base across all benchmarks in our evaluation suite. Tucano2-qwen-1.5B-Base is compared with Qwen3-1.7B-Base, the base model from which they were continually pretrained. The percentage variation in performance is displayed in terms of the difference in evaluation scores between the Base and the Continually Pretrained model.
All individual benchmark scores and their evolution across training time can be found in the [.plots](https://huggingface.co/Polygl0t/Tucano2-qwen-1.5B-Base/tree/main/.plots/) folder.
**Before and After Continual Pretraining**
![Performance Before and After Continual Pretraining](./.plots/before_and_after.png)
This plot compares the compute requirements (measured as C = 6 \* N \* D, where N is the number of parameters and D is the number of tokens processed) against the performance of each model (measured by the NPM score).
![NPM vs Compute](./.plots/performance_vs_compute.png)
<details>
<summary><b>Performance and Compute Details</b></summary>
| | Parameters (B) | Pretraining Tokens (B) | Continual Pretraining Tokens (B) | Total Tokens (B) | Pretraining Compute (FLOPs) | Continual Pretraining Compute (FLOPs) | Total Compute (FLOPs) | NPM Score |
|----------------------------|----------------|------------------------|----------------------------------|------------------|-----------------------------|---------------------------------------|-----------------------|-----------|
| **Tucano2-qwen-3.7B-Base** | 3.7 | 36000 | 50 | 36050 | 8.64e+23 | 1.11e+21 | 8.65e+23 | 59.2 |
| Qwen2.5-7B | 7 | 18000 | - | 18000 | 7.56e+23 | - | 7.56e+23 | 57.97 |
| Qwen3-4B-Base | 4 | 36000 | - | 36000 | 8.64e+23 | - | 8.64e+23 | 57.86 |
| SmolLM3-3B-Base | 3 | 11200 | - | 11200 | 2.02e+23 | - | 2.02e+23 | 50.25 |
| Qwen2.5-3B | 3 | 18000 | - | 18000 | 3.24e+23 | - | 3.24e+23 | 50.15 |
| **Tucano2-qwen-1.5B-Base** | 1.5 | 36000 | 100 | 36100 | 3.67e+23 | 9e+20 | 3.68e+23 | 47.89 |
| Curio-edu-7b | 7 | 2000 | 20 | 2020 | 8.4e+22 | 8.4e+20 | 8.48e+22 | 45.66 |
| Qwen3-1.7B-Base | 1.7 | 36000 | - | 36000 | 3.67e+23 | - | 3.67e+23 | 44.48 |
| Curio-7b | 7 | 2000 | 150 | 2150 | 8.4e+22 | 6.3e+21 | 9.03e+22 | 42.78 |
| Llama-3.2-3B | 3 | 9000 | - | 9000 | 1.62e+23 | - | 1.62e+23 | 40.5 |
| granite-3.3-2b-base | 2 | 12000 | - | 12000 | 1.44e+23 | - | 1.44e+23 | 39.96 |
| **Tucano2-qwen-0.5B-Base** | 0.5 | 36000 | 50 | 36050 | 1.3e+23 | 1.5e+20 | 1.3e+23 | 35.35 |
| Qwen3-0.6B-Base | 0.6 | 36000 | - | 36000 | 1.3e+23 | - | 1.3e+23 | 29.39 |
| Llama-2-7b-hf | 7 | 2000 | - | 2000 | 8.4e+22 | - | 8.4e+22 | 29.36 |
| **Tucano2-0.6B-Base** | 0.6 | 408 | - | 408 | 1.47e+21 | - | 1.47e+21 | 20.63 |
| Qwen2.5-0.5B | 0.5 | 18000 | - | 18000 | 5.4e+22 | - | 5.4e+22 | 19.89 |
| Curio-1.1b | 1.1 | 1000 | 150 | 1150 | 6.6e+21 | 9.9e+20 | 7.59e+21 | 19.23 |
| Tucano-2b4 | 2.4 | 515 | - | 515 | 7.42e+21 | - | 7.42e+21 | 17.87 |
| Curio-edu-1b1 | 1.1 | 1000 | 20 | 1020 | 6.6e+21 | 1.32e+20 | 6.73e+21 | 17.72 |
| Llama-3.2-1B | 1 | 9000 | - | 9000 | 5.4e+22 | - | 5.4e+22 | 16.57 |
| Tucano-1b1 | 1.1 | 250 | - | 250 | 1.65e+21 | - | 1.65e+21 | 15.44 |
| Tucano-630m | 0.63 | 211 | - | 211 | 7.98e+20 | - | 7.98e+20 | 14.89 |
| Carvalho_pt-gl-1.3B | 1.3 | 26 | 5 | 31 | 2.03e+20 | 3.9e+19 | 2.42e+20 | 12.54 |
| TeenyTinyLlama-460m | 0.46 | 6.2 | - | 6.2 | 1.71e+19 | - | 1.71e+19 | 11.18 |
| Tucano-160m | 0.16 | 169 | - | 169 | 1.62e+20 | - | 1.62e+20 | 8.78 |
| TeenyTinyLlama-160m | 0.16 | 6.2 | - | 6.2 | 5.95e+18 | - | 5.95e+18 | 7.71 |
| GlorIA-1.3B | 1.3 | 35 | - | 35 | 2.73e+20 | - | 2.73e+20 | 5.92 |
</details>
## Cite as 🤗
```latex
@misc{correa2026tucano2cool,
title={{Tucano 2 Cool: Better Open Source LLMs for Portuguese}},
author={Nicholas Kluge Corr{\^e}a and Aniket Sen and Shiza Fatimah and Sophia Falk and Lennard Landgraf and Julia Kastner and Lucie Flek},
year={2026},
eprint={2603.03543},
archivePrefix={arXiv},
primaryClass={cs.CL},
url={https://arxiv.org/abs/2603.03543},
}
```
## Aknowlegments
Polyglot is a project funded by the Federal Ministry of Education and Research (BMBF) and the Ministry of Culture and Science of the State of North Rhine-Westphalia (MWK) as part of TRA Sustainable Futures (University of Bonn) and the Excellence Strategy of the federal and state governments.
We also gratefully acknowledge the granted access to the [Marvin cluster](https://www.hpc.uni-bonn.de/en/systems/marvin) hosted by [University of Bonn](https://www.uni-bonn.de/en) along with the support provided by its High Performance Computing & Analytics Lab.
## License
Tucano2-qwen-1.5B-Base is licensed under the Apache License, Version 2.0. For more details, see the [LICENSE](LICENSE) file.

62
config.json Normal file
View File

@@ -0,0 +1,62 @@
{
"architectures": [
"Qwen3ForCausalLM"
],
"attention_bias": false,
"attention_dropout": 0.0,
"bos_token_id": 1,
"dtype": "bfloat16",
"eos_token_id": 2,
"head_dim": 128,
"hidden_act": "silu",
"hidden_size": 2048,
"initializer_range": 0.02,
"intermediate_size": 6144,
"layer_types": [
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention",
"full_attention"
],
"max_position_embeddings": 4096,
"max_window_layers": 28,
"model_type": "qwen3",
"num_attention_heads": 16,
"num_hidden_layers": 28,
"num_key_value_heads": 8,
"pad_token_id": 49109,
"rms_norm_eps": 1e-06,
"rope_scaling": null,
"rope_theta": 1000000,
"sliding_window": null,
"tie_word_embeddings": true,
"torch_dtype": "bfloat16",
"transformers_version": "4.53.2",
"use_cache": true,
"use_sliding_window": false,
"vocab_size": 49152
}

43
emissions.csv Normal file
View File

@@ -0,0 +1,43 @@
timestamp,project_name,run_id,experiment_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
2026-01-04T00:53:29,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,1672.0128981360467,0.3616482249474259,0.0002162951167126,45.019915875,484.7646790573522,70.0,0.0201888648940545,0.897753190146414,0.0313904687747983,0.9493325238152668,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-04T05:49:41,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,19444.25734276499,4.2291008795863885,0.0002174987095179,45.019199025000006,482.7655568575297,70.0,0.2348000245067472,10.501583710704685,0.3650754578228039,11.101459193034229,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-04T11:11:30,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,38753.11218811601,8.43492817140923,0.0002176580846065,45.01831363714286,484.5844171455976,70.0,0.4679794979088908,20.94621371362422,0.7276316248212911,22.14182483635445,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-04T16:33:27,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,58069.61899157299,12.637987434252608,0.0002176351016886,45.01415124000002,484.63982451788416,70.0,0.7012435854164667,31.38336156972471,1.0903192030491542,33.17492435819033,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-04T21:55:15,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,77377.71302408102,16.8434678120622,0.0002176785427455,45.020718261,472.78786459711847,70.0,0.934410551153674,41.82711327666385,1.452855518716594,44.214379346534194,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-05T03:17:01,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,96684.46855293802,21.04547828219133,0.0002176717584238,45.01916109,480.4906651352453,70.0,1.1675632860385523,52.26179285662305,1.8153696276265392,55.2447257702883,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-05T08:38:49,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,115991.63376416505,25.247856532391783,0.0002176696345507,45.022424445000006,487.6202874492108,70.0,1.400724185860845,62.69741657400424,2.177896862898236,66.27603762276357,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-05T14:00:35,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,135298.054422079,29.44756302094976,0.0002176495674437,45.01929460500001,486.7508296644243,70.0,1.633874360692557,73.12605403524107,2.5404076611594792,77.30033605709347,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-05T19:22:23,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,154606.302445931,33.650672001514444,0.0002176539472786,45.01864712571429,481.2120431259584,70.0,1.867044639775351,83.56357299774962,2.9029484512387627,88.33356608876346,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-06T00:44:09,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,173912.178886339,37.85256180611232,0.0002176533124275,45.01715610375,483.6554985866221,70.0,2.100189815705816,93.99795461913716,3.265451328675393,99.36359576351836,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-06T06:05:54,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,193217.17706548003,42.05097558628693,0.0002176358035291,45.01952937000001,484.7059326018373,70.0,2.33332110738652,104.42324762964276,3.627932080104468,110.38450081713329,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-06T11:27:47,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,212530.15787969803,46.25285688778893,0.0002176296171293,45.02398519038461,808.9535299055012,70.0,2.5665519780566304,114.85738787333752,3.9905683197285895,121.414508171122,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-06T16:49:35,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,231837.855038024,50.45379269585461,0.0002176253428827,0.0,535.4665927768112,70.0,2.7997165445120697,125.28892996300908,4.353387081808224,132.44203358932828,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-06T22:11:28,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,251150.89791434805,54.65462820015403,0.0002176166944017,45.01846470214287,485.30602055119977,70.0,3.0329467619813424,135.7203271267309,4.716021819789988,143.4692957085025,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-07T03:33:36,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,270479.23990876006,58.856623712839706,0.000217601261127,45.02421498375001,665.4842286309712,70.0,3.266363310468128,146.1542949405653,5.078944617688521,154.49960286872215,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-07T08:55:22,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,289785.181663043,63.05984593456516,0.0002176089390515,45.01917849461539,868.9230926440835,70.0,3.4995023270463355,156.5921901961529,5.441437637097453,165.53313016029705,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-07T14:17:09,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,309092.21288732707,67.25836504844831,0.0002175996749324,0.0,0.0,70.0,3.732656094991943,167.01742023355234,5.804235388080284,176.55431171662505,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-07T19:38:58,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,328400.79333176,71.46097276904095,0.0002176029236837,45.01939423500001,479.4653427247581,70.0,3.965831424019364,177.45360994638622,6.166784562239569,187.58622593264457,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-08T01:00:47,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,347709.7525634171,75.66309031421144,0.0002176041648426,45.019186976250005,481.31254917203177,70.0,4.199013129390241,187.8884982006786,6.529342100069741,198.61685343013897,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-08T06:22:36,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,367019.01668525406,79.86450018570841,0.0002176031664707,45.02422620000001,547.7264625280638,70.0,4.432194094217369,198.32152950654145,6.89189967187149,209.64562327263,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-08T11:44:30,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,386333.212516612,84.06487647912063,0.0002175968147587,45.02229371100001,483.01274136565706,70.0,4.665430583328959,208.7517054509535,7.254543921410093,220.67167995569136,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-08T17:06:16,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,405639.152044852,88.2633213389953,0.0002175907352484,45.021496023000005,483.6714764025161,70.0,4.89857015528936,219.1770576740061,7.617038764733076,231.69266659402888,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-08T22:28:05,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,424947.72908463504,92.46002132998896,0.0002175797515829,45.0169352775,478.339030320535,70.0,5.131743782461101,229.5977411369355,7.97958800353076,242.7090729229268,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-09T03:49:56,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,444259.0430381961,96.65982774527176,0.0002175753746828,45.02668143681819,634.4243733501488,70.0,5.3649481096869005,240.0265014163263,8.34218414053561,253.7336336665488,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-09T09:11:50,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,463572.824956201,100.86067552519698,0.0002175724505307,45.01943568000001,484.4552802111328,70.0,5.598176539522024,250.45793349952004,8.70481797040043,264.7609280094421,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-09T14:33:51,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,482894.124817878,105.0652181909104,0.000217574024597,45.017861946428575,482.8041853022498,70.0,5.831504167079657,260.8988112641604,9.067606056909732,275.7979214881491,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-09T19:55:46,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,502208.942610628,109.27042377206406,0.0002175796058191,45.0263263307143,908.9151128213846,70.0,6.06475756903236,271.3413464465148,9.430551115216456,286.8366551307627,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-10T01:17:37,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,521519.596273953,113.4744512965224,0.0002175842520726,45.02019154500001,481.5145843114581,70.0,6.297954221678273,281.78120735561833,9.79313477792758,297.8722963552235,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-10T06:39:24,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,540827.3727985891,117.67560737074152,0.0002175844147122,45.01754935875,482.4771736798942,70.0,6.531114639119297,292.2136213551536,10.155663981423242,308.9003999756964,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-10T12:01:12,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,560135.153515207,121.87618752281664,0.0002175835363268,45.01880951625,482.430054489862,70.0,6.764277945929993,302.6445168115299,10.5181970336314,319.9269917910924,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-10T17:23:02,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,579444.889759472,126.07697126437505,0.0002175823335273,45.01756854,507.0251537355392,70.0,6.997466515479824,313.0758827285595,10.88076878818248,330.9541180322222,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-10T22:44:50,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,598752.613904556,130.27444202106778,0.0002175764063417,45.01916073409091,692.3847170751268,70.0,7.230630347631746,323.49861499646056,11.243302289631842,341.97254763372564,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-11T20:22:21,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,19694.33545038849,4.140420121964514,0.0002102340610778,45.02229774230769,715.6152593669873,70.0,0.2379094903649518,10.260946260417144,0.3698149985669284,10.868670749349032,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-12T01:41:11,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,38823.88746571168,8.259854018045177,0.0002127518534907,45.02043981,373.17769863629735,70.0,0.4689248080536968,20.48432003716533,0.7290074662264722,21.68225231144553,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-12T07:00:02,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,57955.124901412055,12.37992186382978,0.0002136122022839,45.01808452285714,385.4241548433552,70.0,0.699966693433385,30.709290225190443,1.088241083291844,32.49749800191568,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-12T12:18:56,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,77088.85968300328,16.49728420820384,0.0002140034795694,45.02558758846154,759.4654963242226,70.0,0.931039507515274,40.92707902608152,1.4475231731988447,43.30564170679575,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-12T17:37:53,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,96225.60899510048,20.61482729975355,0.0002142343136617,45.01869930000001,430.06255801322726,70.0,1.1621392468993312,51.145273313686005,1.8068473153918945,54.1142598759773,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-12T22:56:47,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,115360.29743119702,24.732333252545853,0.0002143920725178,45.025926885,380.7974883712888,70.0,1.3932172625401595,61.36342596820157,2.1661373245693256,64.92278055531133,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-13T04:15:45,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,134497.9721878525,28.847225996051005,0.0002144807503548,45.033311655000006,387.6305330294473,70.0,1.6243354261510627,71.57461952076096,2.525486570480943,75.72444151739337,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-13T09:34:46,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,153638.7588795405,32.96794933170222,0.0002145809402011,45.02050812000001,379.04621907408807,70.0,1.855488253191034,81.80102649354677,2.8848931327789025,86.54140787951758,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-13T14:53:34,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,172767.15908273123,37.08209324325416,0.0002146362389711,45.022813326000005,378.2701073055912,70.0,2.086489789305811,92.01055236393864,3.2440609921913874,97.34110314543685,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
2026-01-13T14:53:42,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,172775.305582162,37.08372771929324,0.0002146355788192101,45.01390842,1795.2327740678572,70.0,2.0865850073798553,92.01459960217636,3.2442090622980118,97.3453936718552,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
1 timestamp project_name run_id experiment_id duration emissions emissions_rate cpu_power gpu_power ram_power cpu_energy gpu_energy ram_energy energy_consumed country_name country_iso_code region cloud_provider cloud_region os python_version codecarbon_version cpu_count cpu_model gpu_count gpu_model longitude latitude ram_total_size tracking_mode on_cloud pue
2 2026-01-04T00:53:29 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 1672.0128981360467 0.3616482249474259 0.0002162951167126 45.019915875 484.7646790573522 70.0 0.0201888648940545 0.897753190146414 0.0313904687747983 0.9493325238152668 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
3 2026-01-04T05:49:41 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 19444.25734276499 4.2291008795863885 0.0002174987095179 45.019199025000006 482.7655568575297 70.0 0.2348000245067472 10.501583710704685 0.3650754578228039 11.101459193034229 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
4 2026-01-04T11:11:30 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 38753.11218811601 8.43492817140923 0.0002176580846065 45.01831363714286 484.5844171455976 70.0 0.4679794979088908 20.94621371362422 0.7276316248212911 22.14182483635445 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
5 2026-01-04T16:33:27 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 58069.61899157299 12.637987434252608 0.0002176351016886 45.01415124000002 484.63982451788416 70.0 0.7012435854164667 31.38336156972471 1.0903192030491542 33.17492435819033 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
6 2026-01-04T21:55:15 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 77377.71302408102 16.8434678120622 0.0002176785427455 45.020718261 472.78786459711847 70.0 0.934410551153674 41.82711327666385 1.452855518716594 44.214379346534194 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
7 2026-01-05T03:17:01 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 96684.46855293802 21.04547828219133 0.0002176717584238 45.01916109 480.4906651352453 70.0 1.1675632860385523 52.26179285662305 1.8153696276265392 55.2447257702883 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
8 2026-01-05T08:38:49 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 115991.63376416505 25.247856532391783 0.0002176696345507 45.022424445000006 487.6202874492108 70.0 1.400724185860845 62.69741657400424 2.177896862898236 66.27603762276357 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
9 2026-01-05T14:00:35 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 135298.054422079 29.44756302094976 0.0002176495674437 45.01929460500001 486.7508296644243 70.0 1.633874360692557 73.12605403524107 2.5404076611594792 77.30033605709347 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
10 2026-01-05T19:22:23 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 154606.302445931 33.650672001514444 0.0002176539472786 45.01864712571429 481.2120431259584 70.0 1.867044639775351 83.56357299774962 2.9029484512387627 88.33356608876346 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
11 2026-01-06T00:44:09 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 173912.178886339 37.85256180611232 0.0002176533124275 45.01715610375 483.6554985866221 70.0 2.100189815705816 93.99795461913716 3.265451328675393 99.36359576351836 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
12 2026-01-06T06:05:54 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 193217.17706548003 42.05097558628693 0.0002176358035291 45.01952937000001 484.7059326018373 70.0 2.33332110738652 104.42324762964276 3.627932080104468 110.38450081713329 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
13 2026-01-06T11:27:47 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 212530.15787969803 46.25285688778893 0.0002176296171293 45.02398519038461 808.9535299055012 70.0 2.5665519780566304 114.85738787333752 3.9905683197285895 121.414508171122 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
14 2026-01-06T16:49:35 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 231837.855038024 50.45379269585461 0.0002176253428827 0.0 535.4665927768112 70.0 2.7997165445120697 125.28892996300908 4.353387081808224 132.44203358932828 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
15 2026-01-06T22:11:28 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 251150.89791434805 54.65462820015403 0.0002176166944017 45.01846470214287 485.30602055119977 70.0 3.0329467619813424 135.7203271267309 4.716021819789988 143.4692957085025 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
16 2026-01-07T03:33:36 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 270479.23990876006 58.856623712839706 0.000217601261127 45.02421498375001 665.4842286309712 70.0 3.266363310468128 146.1542949405653 5.078944617688521 154.49960286872215 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
17 2026-01-07T08:55:22 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 289785.181663043 63.05984593456516 0.0002176089390515 45.01917849461539 868.9230926440835 70.0 3.4995023270463355 156.5921901961529 5.441437637097453 165.53313016029705 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
18 2026-01-07T14:17:09 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 309092.21288732707 67.25836504844831 0.0002175996749324 0.0 0.0 70.0 3.732656094991943 167.01742023355234 5.804235388080284 176.55431171662505 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
19 2026-01-07T19:38:58 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 328400.79333176 71.46097276904095 0.0002176029236837 45.01939423500001 479.4653427247581 70.0 3.965831424019364 177.45360994638622 6.166784562239569 187.58622593264457 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
20 2026-01-08T01:00:47 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 347709.7525634171 75.66309031421144 0.0002176041648426 45.019186976250005 481.31254917203177 70.0 4.199013129390241 187.8884982006786 6.529342100069741 198.61685343013897 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
21 2026-01-08T06:22:36 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 367019.01668525406 79.86450018570841 0.0002176031664707 45.02422620000001 547.7264625280638 70.0 4.432194094217369 198.32152950654145 6.89189967187149 209.64562327263 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
22 2026-01-08T11:44:30 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 386333.212516612 84.06487647912063 0.0002175968147587 45.02229371100001 483.01274136565706 70.0 4.665430583328959 208.7517054509535 7.254543921410093 220.67167995569136 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
23 2026-01-08T17:06:16 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 405639.152044852 88.2633213389953 0.0002175907352484 45.021496023000005 483.6714764025161 70.0 4.89857015528936 219.1770576740061 7.617038764733076 231.69266659402888 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
24 2026-01-08T22:28:05 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 424947.72908463504 92.46002132998896 0.0002175797515829 45.0169352775 478.339030320535 70.0 5.131743782461101 229.5977411369355 7.97958800353076 242.7090729229268 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
25 2026-01-09T03:49:56 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 444259.0430381961 96.65982774527176 0.0002175753746828 45.02668143681819 634.4243733501488 70.0 5.3649481096869005 240.0265014163263 8.34218414053561 253.7336336665488 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
26 2026-01-09T09:11:50 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 463572.824956201 100.86067552519698 0.0002175724505307 45.01943568000001 484.4552802111328 70.0 5.598176539522024 250.45793349952004 8.70481797040043 264.7609280094421 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
27 2026-01-09T14:33:51 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 482894.124817878 105.0652181909104 0.000217574024597 45.017861946428575 482.8041853022498 70.0 5.831504167079657 260.8988112641604 9.067606056909732 275.7979214881491 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
28 2026-01-09T19:55:46 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 502208.942610628 109.27042377206406 0.0002175796058191 45.0263263307143 908.9151128213846 70.0 6.06475756903236 271.3413464465148 9.430551115216456 286.8366551307627 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
29 2026-01-10T01:17:37 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 521519.596273953 113.4744512965224 0.0002175842520726 45.02019154500001 481.5145843114581 70.0 6.297954221678273 281.78120735561833 9.79313477792758 297.8722963552235 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
30 2026-01-10T06:39:24 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 540827.3727985891 117.67560737074152 0.0002175844147122 45.01754935875 482.4771736798942 70.0 6.531114639119297 292.2136213551536 10.155663981423242 308.9003999756964 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
31 2026-01-10T12:01:12 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 560135.153515207 121.87618752281664 0.0002175835363268 45.01880951625 482.430054489862 70.0 6.764277945929993 302.6445168115299 10.5181970336314 319.9269917910924 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
32 2026-01-10T17:23:02 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 579444.889759472 126.07697126437505 0.0002175823335273 45.01756854 507.0251537355392 70.0 6.997466515479824 313.0758827285595 10.88076878818248 330.9541180322222 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
33 2026-01-10T22:44:50 Polyglot 18fae7ac-5cb7-4fcf-af97-7261add1c5b3 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 598752.613904556 130.27444202106778 0.0002175764063417 45.01916073409091 692.3847170751268 70.0 7.230630347631746 323.49861499646056 11.243302289631842 341.97254763372564 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
34 2026-01-11T20:22:21 Polyglot f2a62502-6236-4d25-95f8-079b2519fd37 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 19694.33545038849 4.140420121964514 0.0002102340610778 45.02229774230769 715.6152593669873 70.0 0.2379094903649518 10.260946260417144 0.3698149985669284 10.868670749349032 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
35 2026-01-12T01:41:11 Polyglot f2a62502-6236-4d25-95f8-079b2519fd37 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 38823.88746571168 8.259854018045177 0.0002127518534907 45.02043981 373.17769863629735 70.0 0.4689248080536968 20.48432003716533 0.7290074662264722 21.68225231144553 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
36 2026-01-12T07:00:02 Polyglot f2a62502-6236-4d25-95f8-079b2519fd37 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 57955.124901412055 12.37992186382978 0.0002136122022839 45.01808452285714 385.4241548433552 70.0 0.699966693433385 30.709290225190443 1.088241083291844 32.49749800191568 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
37 2026-01-12T12:18:56 Polyglot f2a62502-6236-4d25-95f8-079b2519fd37 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 77088.85968300328 16.49728420820384 0.0002140034795694 45.02558758846154 759.4654963242226 70.0 0.931039507515274 40.92707902608152 1.4475231731988447 43.30564170679575 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
38 2026-01-12T17:37:53 Polyglot f2a62502-6236-4d25-95f8-079b2519fd37 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 96225.60899510048 20.61482729975355 0.0002142343136617 45.01869930000001 430.06255801322726 70.0 1.1621392468993312 51.145273313686005 1.8068473153918945 54.1142598759773 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
39 2026-01-12T22:56:47 Polyglot f2a62502-6236-4d25-95f8-079b2519fd37 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 115360.29743119702 24.732333252545853 0.0002143920725178 45.025926885 380.7974883712888 70.0 1.3932172625401595 61.36342596820157 2.1661373245693256 64.92278055531133 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
40 2026-01-13T04:15:45 Polyglot f2a62502-6236-4d25-95f8-079b2519fd37 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 134497.9721878525 28.847225996051005 0.0002144807503548 45.033311655000006 387.6305330294473 70.0 1.6243354261510627 71.57461952076096 2.525486570480943 75.72444151739337 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
41 2026-01-13T09:34:46 Polyglot f2a62502-6236-4d25-95f8-079b2519fd37 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 153638.7588795405 32.96794933170222 0.0002145809402011 45.02050812000001 379.04621907408807 70.0 1.855488253191034 81.80102649354677 2.8848931327789025 86.54140787951758 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
42 2026-01-13T14:53:34 Polyglot f2a62502-6236-4d25-95f8-079b2519fd37 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 172767.15908273123 37.08209324325416 0.0002146362389711 45.022813326000005 378.2701073055912 70.0 2.086489789305811 92.01055236393864 3.2440609921913874 97.34110314543685 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0
43 2026-01-13T14:53:42 Polyglot f2a62502-6236-4d25-95f8-079b2519fd37 5b0fa12a-3dd7-45bb-9766-cc326314d9f1 172775.305582162 37.08372771929324 0.0002146355788192101 45.01390842 1795.2327740678572 70.0 2.0865850073798553 92.01459960217636 3.2442090622980118 97.3453936718552 Germany DEU north rhine-westphalia Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34 3.12.3 3.0.6 256 AMD EPYC 7713 64-Core Processor 4 4 x NVIDIA A100-SXM4-80GB 7.0932 50.7263 1950 machine N 1.0

190
evals.yaml Normal file
View File

@@ -0,0 +1,190 @@
evaluations:
arc_challenge_poly_pt_acc: 0.44529914529914527
arc_challenge_poly_pt_acc_norm: 0.48205128205128206
arc_challenge_poly_pt_acc_norm_stderr: 0.014614459118720773
arc_challenge_poly_pt_acc_stderr: 0.014536106383401307
arc_challenge_poly_pt_alias: arc_challenge_poly_pt
assin2_rte_acc,all: 0.8766339869281046
assin2_rte_acc_stderr,all: 0.004699176594010998
assin2_rte_alias: assin2_rte
assin2_rte_f1_macro,all: 0.8755544782450612
assin2_rte_f1_macro_stderr,all: 0.004739218474976754
assin2_sts_alias: assin2_sts
assin2_sts_mse,all: 1.0735661764705884
assin2_sts_mse_stderr,all: N/A
assin2_sts_pearson,all: 0.6290850483582386
assin2_sts_pearson_stderr,all: 0.009612669804680212
assin_entailment_acc: 0.708
assin_entailment_acc_stderr: 0.007190057317647597
assin_entailment_alias: assin_entailment
assin_paraphrase_acc: 0.72475
assin_paraphrase_acc_stderr: 0.007062884004258771
assin_paraphrase_alias: assin_paraphrase
belebele_por_Latn_acc: 0.74
belebele_por_Latn_acc_norm: 0.74
belebele_por_Latn_acc_norm_stderr: 0.014629271097998421
belebele_por_Latn_acc_stderr: 0.014629271097998421
belebele_por_Latn_alias: belebele_por_Latn
bluex_acc,all: 0.5591098748261474
bluex_acc,exam_id__UNICAMP_2018: 0.5370370370370371
bluex_acc,exam_id__UNICAMP_2019: 0.6
bluex_acc,exam_id__UNICAMP_2020: 0.509090909090909
bluex_acc,exam_id__UNICAMP_2021_1: 0.6304347826086957
bluex_acc,exam_id__UNICAMP_2021_2: 0.47058823529411764
bluex_acc,exam_id__UNICAMP_2022: 0.6923076923076923
bluex_acc,exam_id__UNICAMP_2023: 0.6511627906976745
bluex_acc,exam_id__UNICAMP_2024: 0.5555555555555556
bluex_acc,exam_id__USP_2018: 0.42592592592592593
bluex_acc,exam_id__USP_2019: 0.4
bluex_acc,exam_id__USP_2020: 0.5535714285714286
bluex_acc,exam_id__USP_2021: 0.6346153846153846
bluex_acc,exam_id__USP_2022: 0.4897959183673469
bluex_acc,exam_id__USP_2023: 0.6136363636363636
bluex_acc,exam_id__USP_2024: 0.6829268292682927
bluex_acc_stderr,all: 0.01069785624296974
bluex_acc_stderr,exam_id__UNICAMP_2018: 0.039296745462938605
bluex_acc_stderr,exam_id__UNICAMP_2019: 0.04014798243504816
bluex_acc_stderr,exam_id__UNICAMP_2020: 0.03888891915912078
bluex_acc_stderr,exam_id__UNICAMP_2021_1: 0.0411282805992433
bluex_acc_stderr,exam_id__UNICAMP_2021_2: 0.04024244267609041
bluex_acc_stderr,exam_id__UNICAMP_2022: 0.04269098796102326
bluex_acc_stderr,exam_id__UNICAMP_2023: 0.041929332285094205
bluex_acc_stderr,exam_id__UNICAMP_2024: 0.04271556020713639
bluex_acc_stderr,exam_id__USP_2018: 0.038960456443585575
bluex_acc_stderr,exam_id__USP_2019: 0.04470992542423865
bluex_acc_stderr,exam_id__USP_2020: 0.03835558472845869
bluex_acc_stderr,exam_id__USP_2021: 0.03851223021094464
bluex_acc_stderr,exam_id__USP_2022: 0.0410194387799713
bluex_acc_stderr,exam_id__USP_2023: 0.04234932088737962
bluex_acc_stderr,exam_id__USP_2024: 0.0418113153523233
bluex_alias: bluex
calame_pt_acc: 0.5905587668593449
calame_pt_acc_stderr: 0.010794891914388602
calame_pt_alias: calame_pt
calame_pt_perplexity: 7.008747913313241
calame_pt_perplexity_stderr: 0.40940358093832135
enem_challenge_acc,all: 0.6871938418474458
enem_challenge_acc,exam_id__2009: 0.6782608695652174
enem_challenge_acc,exam_id__2010: 0.717948717948718
enem_challenge_acc,exam_id__2011: 0.7521367521367521
enem_challenge_acc,exam_id__2012: 0.7068965517241379
enem_challenge_acc,exam_id__2013: 0.6666666666666666
enem_challenge_acc,exam_id__2014: 0.6972477064220184
enem_challenge_acc,exam_id__2015: 0.7058823529411765
enem_challenge_acc,exam_id__2016: 0.6611570247933884
enem_challenge_acc,exam_id__2016_2: 0.6422764227642277
enem_challenge_acc,exam_id__2017: 0.6896551724137931
enem_challenge_acc,exam_id__2022: 0.631578947368421
enem_challenge_acc,exam_id__2023: 0.7037037037037037
enem_challenge_acc_stderr,all: 0.0070891143834158395
enem_challenge_acc_stderr,exam_id__2009: 0.0251403029631727
enem_challenge_acc_stderr,exam_id__2010: 0.02405435432253117
enem_challenge_acc_stderr,exam_id__2011: 0.023038334357693698
enem_challenge_acc_stderr,exam_id__2012: 0.02443296265724745
enem_challenge_acc_stderr,exam_id__2013: 0.02625818903872996
enem_challenge_acc_stderr,exam_id__2014: 0.02543475203567573
enem_challenge_acc_stderr,exam_id__2015: 0.0241011316238719
enem_challenge_acc_stderr,exam_id__2016: 0.02479881231135445
enem_challenge_acc_stderr,exam_id__2016_2: 0.024985945100694615
enem_challenge_acc_stderr,exam_id__2017: 0.024741511708920926
enem_challenge_acc_stderr,exam_id__2022: 0.024112138519174948
enem_challenge_acc_stderr,exam_id__2023: 0.022685440228473772
enem_challenge_alias: enem
faquad_nli_acc,all: 0.7846153846153846
faquad_nli_acc_stderr,all: 0.011396120309131366
faquad_nli_alias: faquad_nli
faquad_nli_f1_macro,all: 0.4396551724137931
faquad_nli_f1_macro_stderr,all: 0.00357969847290883
global_piqa_completions_por_latn_braz_acc: 0.8
global_piqa_completions_por_latn_braz_acc_bytes: 0.77
global_piqa_completions_por_latn_braz_acc_bytes_stderr: 0.042295258468165065
global_piqa_completions_por_latn_braz_acc_norm: 0.77
global_piqa_completions_por_latn_braz_acc_norm_stderr: 0.042295258468165065
global_piqa_completions_por_latn_braz_acc_stderr: 0.04020151261036849
global_piqa_completions_por_latn_braz_alias: global_piqa_completions_por_latn_braz
hatebr_offensive_acc,all: 0.8064285714285714
hatebr_offensive_acc_stderr,all: 0.0074826455677965455
hatebr_offensive_alias: hatebr_offensive_binary
hatebr_offensive_f1_macro,all: 0.801107069296415
hatebr_offensive_f1_macro_stderr,all: 0.007665138669900729
hellaswag_poly_pt_acc: 0.42539820132192
hellaswag_poly_pt_acc_norm: 0.5624661393433742
hellaswag_poly_pt_acc_norm_stderr: 0.005164166461307016
hellaswag_poly_pt_acc_stderr: 0.005146684217488626
hellaswag_poly_pt_alias: hellaswag_poly_pt
lambada_poly_pt_acc: 0.5420143605666602
lambada_poly_pt_acc_stderr: 0.006941341313928234
lambada_poly_pt_alias: lambada_poly_pt
lambada_poly_pt_perplexity: 9.820716308685725
lambada_poly_pt_perplexity_stderr: 0.3120846033602529
mmlu_poly_pt_acc: 0.5403782647853498
mmlu_poly_pt_acc_stderr: 0.004317657624183865
mmlu_poly_pt_alias: mmlu_poly_pt
oab_exams_acc,all: 0.48291571753986334
oab_exams_acc,exam_id__2010-01: 0.4588235294117647
oab_exams_acc,exam_id__2010-02: 0.51
oab_exams_acc,exam_id__2011-03: 0.46464646464646464
oab_exams_acc,exam_id__2011-04: 0.45
oab_exams_acc,exam_id__2011-05: 0.5
oab_exams_acc,exam_id__2012-06: 0.4625
oab_exams_acc,exam_id__2012-06a: 0.525
oab_exams_acc,exam_id__2012-07: 0.5
oab_exams_acc,exam_id__2012-08: 0.4625
oab_exams_acc,exam_id__2012-09: 0.33766233766233766
oab_exams_acc,exam_id__2013-10: 0.525
oab_exams_acc,exam_id__2013-11: 0.525
oab_exams_acc,exam_id__2013-12: 0.525
oab_exams_acc,exam_id__2014-13: 0.475
oab_exams_acc,exam_id__2014-14: 0.5375
oab_exams_acc,exam_id__2014-15: 0.5641025641025641
oab_exams_acc,exam_id__2015-16: 0.5375
oab_exams_acc,exam_id__2015-17: 0.5384615384615384
oab_exams_acc,exam_id__2015-18: 0.4625
oab_exams_acc,exam_id__2016-19: 0.48717948717948717
oab_exams_acc,exam_id__2016-20: 0.45
oab_exams_acc,exam_id__2016-20a: 0.425
oab_exams_acc,exam_id__2016-21: 0.4625
oab_exams_acc,exam_id__2017-22: 0.45
oab_exams_acc,exam_id__2017-23: 0.45
oab_exams_acc,exam_id__2017-24: 0.5
oab_exams_acc,exam_id__2018-25: 0.45
oab_exams_acc_stderr,all: 0.006164493571290463
oab_exams_acc_stderr,exam_id__2010-01: 0.03120711424338333
oab_exams_acc_stderr,exam_id__2010-02: 0.028912621193308535
oab_exams_acc_stderr,exam_id__2011-03: 0.028826912523627856
oab_exams_acc_stderr,exam_id__2011-04: 0.03204801747078995
oab_exams_acc_stderr,exam_id__2011-05: 0.03224202969176272
oab_exams_acc_stderr,exam_id__2012-06: 0.03222923233485234
oab_exams_acc_stderr,exam_id__2012-06a: 0.03229751885191722
oab_exams_acc_stderr,exam_id__2012-07: 0.03239443199904663
oab_exams_acc_stderr,exam_id__2012-08: 0.032144839789965185
oab_exams_acc_stderr,exam_id__2012-09: 0.03103244684042299
oab_exams_acc_stderr,exam_id__2013-10: 0.032222242709920586
oab_exams_acc_stderr,exam_id__2013-11: 0.032249698626176736
oab_exams_acc_stderr,exam_id__2013-12: 0.03225675063294939
oab_exams_acc_stderr,exam_id__2014-13: 0.03217856982922958
oab_exams_acc_stderr,exam_id__2014-14: 0.032246622088818386
oab_exams_acc_stderr,exam_id__2014-15: 0.032435167155658584
oab_exams_acc_stderr,exam_id__2015-16: 0.03223354880595777
oab_exams_acc_stderr,exam_id__2015-17: 0.032573794785528166
oab_exams_acc_stderr,exam_id__2015-18: 0.032123574402475284
oab_exams_acc_stderr,exam_id__2016-19: 0.03271170717682627
oab_exams_acc_stderr,exam_id__2016-20: 0.03203769414642788
oab_exams_acc_stderr,exam_id__2016-20a: 0.031951776527517205
oab_exams_acc_stderr,exam_id__2016-21: 0.03217984644292296
oab_exams_acc_stderr,exam_id__2017-22: 0.03205629372165545
oab_exams_acc_stderr,exam_id__2017-23: 0.03221345216992268
oab_exams_acc_stderr,exam_id__2017-24: 0.03232207361521986
oab_exams_acc_stderr,exam_id__2018-25: 0.03198727711742204
oab_exams_alias: oab_exams
portuguese_hate_speech_acc,all: 0.7297297297297297
portuguese_hate_speech_acc_stderr,all: 0.010749375621571917
portuguese_hate_speech_alias: portuguese_hate_speech_binary
portuguese_hate_speech_f1_macro,all: 0.679463244638342
portuguese_hate_speech_f1_macro_stderr,all: 0.01220967447481398
tweetsentbr_acc,all: 0.7014925373134329
tweetsentbr_acc_stderr,all: 0.007246042251471291
tweetsentbr_alias: tweetsentbr
tweetsentbr_f1_macro,all: 0.6540958473356445
tweetsentbr_f1_macro_stderr,all: 0.007812938746547184
step: 100000

42
evals_all_steps.csv Normal file

File diff suppressed because one or more lines are too long

27
evals_for_comparison.csv Normal file

File diff suppressed because one or more lines are too long

14
generation_config.json Normal file
View File

@@ -0,0 +1,14 @@
{
"bos_token_id": 1,
"eos_token_id": 2,
"pad_token_id": 49109,
"transformers_version": "4.53.2",
"do_sample": true,
"max_new_tokens": 1024,
"renormalize_logits": true,
"repetition_penalty": 1.2,
"temperature": 0.1,
"top_k": 50,
"top_p": 1.0,
"use_cache": false
}

3
logo.png Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:1856d91c3b35390cee5122902d94044657c67df7034ca4005316275c404fc8a0
size 197189

3
model.safetensors Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:5bc9634572194040fe75c7ff1d23e941a22564fadbb000e7f664e3b6cfc8141c
size 3020182248

82
ruler.yaml Normal file
View File

@@ -0,0 +1,82 @@
model_name: Tucano2-qwen-1.5B-Base
results:
niah_pt_multikey_1_1024: 0.516
niah_pt_multikey_1_1024_stderr: 0.022371610982580396
niah_pt_multikey_1_2048: 0.444
niah_pt_multikey_1_2048_stderr: 0.022242244375731048
niah_pt_multikey_1_4096: 0.296
niah_pt_multikey_1_4096_stderr: N/A
niah_pt_multikey_1_alias: " - niah_pt_multikey_1"
niah_pt_multikey_2_1024: 0.064
niah_pt_multikey_2_1024_stderr: 0.01095664621097098
niah_pt_multikey_2_2048: 0.012
niah_pt_multikey_2_2048_stderr: 0.0048743737072649805
niah_pt_multikey_2_4096: 0.024
niah_pt_multikey_2_4096_stderr: N/A
niah_pt_multikey_2_alias: " - niah_pt_multikey_2"
niah_pt_multikey_3_1024: 0.57
niah_pt_multikey_3_1024_stderr: 0.022162634426652866
niah_pt_multikey_3_2048: 0.382
niah_pt_multikey_3_2048_stderr: 0.02175082059125093
niah_pt_multikey_3_4096: 0.254
niah_pt_multikey_3_4096_stderr: N/A
niah_pt_multikey_3_alias: " - niah_pt_multikey_3"
niah_pt_multiquery_1024: 0.162
niah_pt_multiquery_1024_stderr: 0.011325570055151673
niah_pt_multiquery_2048: 0.0385
niah_pt_multiquery_2048_stderr: 0.0059461090155537594
niah_pt_multiquery_4096: 0.0295
niah_pt_multiquery_4096_stderr: N/A
niah_pt_multiquery_alias: " - niah_pt_multiquery"
niah_pt_multivalue_1024: 0.0985
niah_pt_multivalue_1024_stderr: 0.008944579980191584
niah_pt_multivalue_2048: 0.025
niah_pt_multivalue_2048_stderr: 0.004721714643893329
niah_pt_multivalue_4096: 0.0335
niah_pt_multivalue_4096_stderr: N/A
niah_pt_multivalue_alias: " - niah_pt_multivalue"
niah_pt_single_1_1024: 0.066
niah_pt_single_1_1024_stderr: 0.011114633153652916
niah_pt_single_1_2048: 0.058
niah_pt_single_1_2048_stderr: 0.010463793860924815
niah_pt_single_1_4096: 0.058
niah_pt_single_1_4096_stderr: N/A
niah_pt_single_1_alias: " - niah_pt_single_1"
niah_pt_single_2_1024: 0.602
niah_pt_single_2_1024_stderr: 0.021912377885779953
niah_pt_single_2_2048: 0.466
niah_pt_single_2_2048_stderr: 0.022331264423258324
niah_pt_single_2_4096: 0.452
niah_pt_single_2_4096_stderr: N/A
niah_pt_single_2_alias: " - niah_pt_single_2"
niah_pt_single_3_1024: 0.496
niah_pt_single_3_1024_stderr: 0.022382357781962105
niah_pt_single_3_2048: 0.472
niah_pt_single_3_2048_stderr: 0.022347949832668024
niah_pt_single_3_4096: 0.43
niah_pt_single_3_4096_stderr: N/A
niah_pt_single_3_alias: " - niah_pt_single_3"
ruler_pt_4096: 0.28358181818181816
ruler_pt_4096_stderr: N/A
ruler_pt_alias: ruler_pt
ruler_pt_cwe_1024: 0.3368
ruler_pt_cwe_1024_stderr: 0.010680808928106402
ruler_pt_cwe_2048: 0.18159999999999998
ruler_pt_cwe_2048_stderr: 0.009549096842162718
ruler_pt_cwe_4096: 0.186
ruler_pt_cwe_4096_stderr: N/A
ruler_pt_cwe_alias: " - ruler_pt_cwe"
ruler_pt_fwe_1024: 0.6753333333333332
ruler_pt_fwe_1024_stderr: 0.01312227747420385
ruler_pt_fwe_2048: 0.5393333333333332
ruler_pt_fwe_2048_stderr: 0.012449640261503029
ruler_pt_fwe_4096: 0.494
ruler_pt_fwe_4096_stderr: N/A
ruler_pt_fwe_alias: " - ruler_pt_fwe"
ruler_pt_vt_1024: 0.9276
ruler_pt_vt_1024_stderr: 0.01029798898592869
ruler_pt_vt_2048: 0.9256
ruler_pt_vt_2048_stderr: 0.009345799480829435
ruler_pt_vt_4096: 0.8623999999999999
ruler_pt_vt_4096_stderr: N/A
ruler_pt_vt_alias: " - ruler_pt_vt"

30
special_tokens_map.json Normal file
View File

@@ -0,0 +1,30 @@
{
"bos_token": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"eos_token": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"pad_token": {
"content": "<|pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
},
"unk_token": {
"content": "<|unk|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false
}
}

463711
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

397
tokenizer_config.json Normal file
View File

@@ -0,0 +1,397 @@
{
"add_bos_token": false,
"add_eos_token": false,
"add_prefix_space": null,
"added_tokens_decoder": {
"0": {
"content": "<|unk|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"1": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"2": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49109": {
"content": "<|pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49110": {
"content": "<tools>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49111": {
"content": "</tools>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49112": {
"content": "<tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49113": {
"content": "</tool_call>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49114": {
"content": "<tool_response>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49115": {
"content": "</tool_response>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49116": {
"content": "<think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49117": {
"content": "</think>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49118": {
"content": "<answer>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49119": {
"content": "</answer>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49120": {
"content": "<context>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49121": {
"content": "</context>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49122": {
"content": "<|fim_prefix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49123": {
"content": "<|fim_suffix|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49124": {
"content": "<|fim_middle|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49125": {
"content": "<|repo_name|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49126": {
"content": "<|image|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49127": {
"content": "<|image_pad|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49128": {
"content": "<|image_placeholder|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49129": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49130": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49131": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49132": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49133": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49134": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49135": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49136": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49137": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49138": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49139": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49140": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49141": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49142": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49143": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49144": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49145": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49146": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49147": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49148": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"49149": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49150": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
},
"49151": {
"content": " ",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": false
}
},
"bos_token": "<|im_start|>",
"bos_token_id": 1,
"clean_up_tokenization_spaces": false,
"eos_token": "<|im_end|>",
"eos_token_id": 2,
"extra_special_tokens": {},
"legacy": false,
"model_input_names": [
"input_ids",
"attention_mask"
],
"model_max_length": 4096,
"pad_token": "<|pad|>",
"pad_token_id": 49109,
"padding_side": "right",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "PreTrainedTokenizerFast",
"truncation_side": "right",
"unk_token": "<|unk|>",
"unk_token_id": 0,
"use_default_system_prompt": false
}

3
train_logs.parquet Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:d3c0968b7c7482ec0a6c83b649911e063ceef11d31b681cfdffe0a3ccdbb551d
size 2492246

97
training_config.yaml Normal file
View File

@@ -0,0 +1,97 @@
# Directory settings
checkpoint_dir: "/lustre/scratch/data/polyglot_datasets/portuguese/checkpoints/models/Tucano2-qwen-1.5B-Base"
train_dataset_dir:
# Total: ~100B
# Web Text (~70B, 70%)
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2/3_10b # 12B (PT)
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2/4 # 28B (PT)
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2/4 # 28B (PT)
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2/5 # 0.1B (PT)
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2/5 # 0.1B (PT)
# Synthetic Text (~30B, 30%)
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2_synth # 10B (PT)
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2_synth # 10B (PT)
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2_synth # 10B (PT)
val_dataset_dir: "/lustre/scratch/data/polyglot_datasets/portuguese/tokenized/validation"
dataset_type: "parquet"
cache_dir: "/lustre/mlnvme/data/nanotronics/.cache"
# Data loading settings
pin_memory: true
num_workers_for_dataloader: 16
shuffle_dataset: true
mask_eos_token: false
mask_pad_token: false
# Model architecture settings
vocab_size: 49152
num_hidden_layers: 28
num_attention_heads: 16
num_key_value_heads: 8
head_dim: 128
hidden_size: 2048
intermediate_size: 6144
max_position_embeddings: 4096
tie_word_embeddings: true
hidden_act: "silu"
output_hidden_states: false
attn_implementation: "flash_attention_2"
use_cache: false
no_rope_layer_interval: null
rope_theta: 1000000.0
rope_scale_factor: null
rms_norm_eps: 0.000001
# Training settings
total_batch_size: 1048576
micro_batch_size: 4
eval_micro_batch_size: 4
num_train_epochs: 1
warmup_steps: 200
max_learning_rate: 0.0001
min_learning_rate: 0.0
muon_learning_rate: 0.001
weight_decay: 0.1
beta1: 0.9
beta2: 0.95
eps: 0.00000001
lr_decay_type: "cosine"
use_sqrt: true
lr_decay_iters_coef: 1.
seed: 42
max_steps: 100000
max_grad_norm: 1.0
# Precision and optimization settings
torch_compile: false
mat_mul_precision: "highest"
tf32: true
bf16: true
gradient_checkpointing: false
use_liger_kernel: true
static_graph: false
# Hub settings
push_to_hub: false
hub_token: null
hub_model_id: null
# Tokenizer and Reference model
tokenizer_name_or_path: "/lustre/scratch/data/polyglot_datasets/portuguese/checkpoints/models/Tucano2-qwen-1.5B"
chat_template_path: null
reference_model: "/lustre/scratch/data/polyglot_datasets/portuguese/checkpoints/models/Tucano2-qwen-1.5B"
continual_pretraining: true
# Checkpoint settings
resume_from_checkpoint: null
checkpointing_steps: 2500
begin_new_stage: false
stage_name: "single_cosine"
# Miscellaneous settings
sanity_check: false
sanity_check_num_samples: 100000
wandb_token: null
wandb_id: "tucano2-qwen-1.5b"
wandb_project: "Polyglot"
wandb_desc: "Developing LLMs for low-resource languages"

3
val_logs.parquet Normal file
View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:376bd8998a1af2ef32f19df0842ae522de0066c2e9d4f4f0b16b0b3fb46c7afb
size 2240