初始化项目,由ModelHub XC社区提供模型
Model: Polygl0t/Tucano2-qwen-1.5B-Base Source: Original Platform
This commit is contained in:
59
.gitattributes
vendored
Normal file
59
.gitattributes
vendored
Normal file
@@ -0,0 +1,59 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
logo.png filter=lfs diff=lfs merge=lfs -text
|
||||
benchmarks_hard.png filter=lfs diff=lfs merge=lfs -text
|
||||
learning_curve.png filter=lfs diff=lfs merge=lfs -text
|
||||
benchmarks_easy.png filter=lfs diff=lfs merge=lfs -text
|
||||
gradient_norm.png filter=lfs diff=lfs merge=lfs -text
|
||||
npm_hard.png filter=lfs diff=lfs merge=lfs -text
|
||||
performance_vs_compute.png filter=lfs diff=lfs merge=lfs -text
|
||||
npm_easy.png filter=lfs diff=lfs merge=lfs -text
|
||||
.plots/arc_challenge.png filter=lfs diff=lfs merge=lfs -text
|
||||
.plots/before_and_after.png filter=lfs diff=lfs merge=lfs -text
|
||||
.plots/belebele.png filter=lfs diff=lfs merge=lfs -text
|
||||
.plots/bluex.png filter=lfs diff=lfs merge=lfs -text
|
||||
.plots/calame.png filter=lfs diff=lfs merge=lfs -text
|
||||
.plots/enem.png filter=lfs diff=lfs merge=lfs -text
|
||||
.plots/global_piqa.png filter=lfs diff=lfs merge=lfs -text
|
||||
.plots/gradient_norm.png filter=lfs diff=lfs merge=lfs -text
|
||||
.plots/hellaswag.png filter=lfs diff=lfs merge=lfs -text
|
||||
.plots/lambada.png filter=lfs diff=lfs merge=lfs -text
|
||||
.plots/learning_curve.png filter=lfs diff=lfs merge=lfs -text
|
||||
.plots/mmlu.png filter=lfs diff=lfs merge=lfs -text
|
||||
.plots/npm_easy.png filter=lfs diff=lfs merge=lfs -text
|
||||
.plots/npm_hard.png filter=lfs diff=lfs merge=lfs -text
|
||||
.plots/oab.png filter=lfs diff=lfs merge=lfs -text
|
||||
.plots/performance_vs_compute.png filter=lfs diff=lfs merge=lfs -text
|
||||
3
.plots/arc_challenge.png
Normal file
3
.plots/arc_challenge.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a0515e52aa953710e40ad0d7c8afd53616c7d5e21baca77cb0a6e30b66c698ba
|
||||
size 214244
|
||||
3
.plots/before_and_after.png
Normal file
3
.plots/before_and_after.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9ef79a4e69550354b943b9afdcdba327e5955efd2936863bdb2cfdd76d55b469
|
||||
size 274768
|
||||
3
.plots/belebele.png
Normal file
3
.plots/belebele.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:81ca953fa216916004f8372239a1fd6a1ab35879708d1adb0fe693260e1637e0
|
||||
size 207554
|
||||
3
.plots/bluex.png
Normal file
3
.plots/bluex.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b8632030c0d198315dbe6a52b28fa212c6ca871723e9483a63ebe385b384519e
|
||||
size 216448
|
||||
3
.plots/calame.png
Normal file
3
.plots/calame.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:73e2c054134e9839615b56f37abf0a2e5b6ea23e3bfc38860a65243a27edbaf2
|
||||
size 184143
|
||||
3
.plots/enem.png
Normal file
3
.plots/enem.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9ffafb63234d8ae7e700fefd5aacfca05fb593dd6546fe1d15acd35f4d1aacef
|
||||
size 216780
|
||||
3
.plots/global_piqa.png
Normal file
3
.plots/global_piqa.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d2198b12ea2f1ec9eee7429c15292ed0eda41c0da7e20e6101c195a40bfb37f1
|
||||
size 208997
|
||||
3
.plots/gradient_norm.png
Normal file
3
.plots/gradient_norm.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bb8faf475fc449942a938d829b029a508778891308adfbcae4333317f8419302
|
||||
size 276097
|
||||
3
.plots/hellaswag.png
Normal file
3
.plots/hellaswag.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3e68abf1294563b9e19ca22fa9eaa2fe3121456232f280521e0d49fa1d3db29c
|
||||
size 187890
|
||||
3
.plots/lambada.png
Normal file
3
.plots/lambada.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:95688a0b987a681e040c92f2981d0e8d0376d6dd603ca3a9af6519508d25e09c
|
||||
size 190285
|
||||
3
.plots/learning_curve.png
Normal file
3
.plots/learning_curve.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6049a3d73f84e79cf08c669e6cc97a244c12e5b934e98a6c9201a4cc5b5f0ce3
|
||||
size 214493
|
||||
3
.plots/mmlu.png
Normal file
3
.plots/mmlu.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c4bd96a3b6717acaf30748607a49747a62faf4ae9d6261b1551fb6b4698b28d7
|
||||
size 184938
|
||||
3
.plots/npm_easy.png
Normal file
3
.plots/npm_easy.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4b4519ee5c082058513e4694fd94977100d165e941978871f913c4866ef52794
|
||||
size 209025
|
||||
3
.plots/npm_hard.png
Normal file
3
.plots/npm_hard.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5d05298ea6f888e4fe93549c05888030b3be66468f07265f46605105d07fd36d
|
||||
size 227584
|
||||
3
.plots/oab.png
Normal file
3
.plots/oab.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:00d55340f750e295bc1aac92ac529ee04d75b345a7e35a482836694f1c1f5b02
|
||||
size 210259
|
||||
3
.plots/performance_vs_compute.png
Normal file
3
.plots/performance_vs_compute.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c4a657d6d68d6f2f1fbf0d3750eac546a7ead054995a466497e0e31bfeedd49c
|
||||
size 949327
|
||||
495
README.md
Normal file
495
README.md
Normal file
@@ -0,0 +1,495 @@
|
||||
---
|
||||
language:
|
||||
- pt
|
||||
license: apache-2.0
|
||||
library_name: transformers
|
||||
tags:
|
||||
- text-generation-inference
|
||||
datasets:
|
||||
- Polygl0t/gigaverbo-v2
|
||||
- Polygl0t/gigaverbo-v2-synth
|
||||
metrics:
|
||||
- perplexity
|
||||
pipeline_tag: text-generation
|
||||
widget:
|
||||
- text: "A floresta da Amazônia é conhecida por sua"
|
||||
example_title: Exemplo
|
||||
- text: "Uma das coisas que Portugal, Angola, Brasil e Moçambique tem em comum é o"
|
||||
example_title: Exemplo
|
||||
- text: "O Carnaval do Rio de Janeiro é"
|
||||
example_title: Exemplo
|
||||
inference:
|
||||
parameters:
|
||||
repetition_penalty: 1.2
|
||||
temperature: 0.1
|
||||
top_k: 50
|
||||
top_p: 1.0
|
||||
max_new_tokens: 150
|
||||
co2_eq_emissions:
|
||||
emissions: 334000
|
||||
source: CodeCarbon
|
||||
training_type: pre-training
|
||||
geographical_location: Germany
|
||||
hardware_used: NVIDIA A100-SXM4-80GB
|
||||
model-index:
|
||||
- name: Tucano2-qwen-1.5B-Base
|
||||
results:
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: ARC Challenge
|
||||
type: Polygl0t/ARC-poly
|
||||
split: test
|
||||
args:
|
||||
num_few_shot: 5
|
||||
metrics:
|
||||
- type: acc_norm
|
||||
value: 48.21
|
||||
name: Acc-norm
|
||||
source:
|
||||
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
|
||||
name: arc_challenge_poly_pt
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: HellaSwag
|
||||
type: Polygl0t/HellaSwag-poly
|
||||
split: validation
|
||||
args:
|
||||
num_few_shot: 5
|
||||
metrics:
|
||||
- type: acc_norm
|
||||
value: 56.25
|
||||
name: Acc-norm
|
||||
source:
|
||||
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
|
||||
name: hellaswag_poly_pt
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: Calame
|
||||
type: Polygl0t/CALAME-PT
|
||||
split: test
|
||||
args:
|
||||
num_few_shot: 5
|
||||
metrics:
|
||||
- type: acc
|
||||
value: 59.06
|
||||
name: Acc
|
||||
source:
|
||||
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
|
||||
name: calame_poly_pt
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: Lambada
|
||||
type: Polygl0t/LAMBADA-poly
|
||||
split: test
|
||||
args:
|
||||
num_few_shot: 5
|
||||
metrics:
|
||||
- type: acc
|
||||
value: 54.2
|
||||
name: Acc
|
||||
source:
|
||||
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
|
||||
name: lambada_poly_pt
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: Global PIQA
|
||||
type: mrlbenchmarks/global-piqa-nonparallel
|
||||
split: test
|
||||
args:
|
||||
num_few_shot: 5
|
||||
metrics:
|
||||
- type: acc_norm
|
||||
value: 77
|
||||
name: Acc-norm
|
||||
source:
|
||||
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
|
||||
name: global_piqa_completions_por_latn_braz
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: MMLU
|
||||
type: Polygl0t/MMLU-poly
|
||||
split: test
|
||||
args:
|
||||
num_few_shot: 5
|
||||
metrics:
|
||||
- type: acc
|
||||
value: 54.04
|
||||
name: Acc
|
||||
source:
|
||||
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
|
||||
name: mmlu_poly_pt
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: BELEBELE
|
||||
type: facebook/belebele
|
||||
split: test
|
||||
args:
|
||||
num_few_shot: 5
|
||||
metrics:
|
||||
- type: acc_norm
|
||||
value: 74
|
||||
name: Acc-norm
|
||||
source:
|
||||
url: https://github.com/Polygl0t/lm-evaluation-harness/tree/polyglot_harness_portuguese
|
||||
name: belebele_por_Latn
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: BLUEX
|
||||
type: eduagarcia-temp/BLUEX_without_images
|
||||
split: train
|
||||
args:
|
||||
num_few_shot: 3
|
||||
metrics:
|
||||
- type: acc
|
||||
value: 55.91
|
||||
name: Acc
|
||||
source:
|
||||
url: https://github.com/eduagarcia/lm-evaluation-harness-pt
|
||||
name: bluex
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: ENEM Challenge
|
||||
type: eduagarcia/enem_challenge
|
||||
split: train
|
||||
args:
|
||||
num_few_shot: 3
|
||||
metrics:
|
||||
- type: acc
|
||||
value: 68.72
|
||||
name: Acc
|
||||
source:
|
||||
url: https://github.com/eduagarcia/lm-evaluation-harness-pt
|
||||
name: enem_challenge
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: OAB Exams
|
||||
type: eduagarcia/oab_exams
|
||||
split: train
|
||||
args:
|
||||
num_few_shot: 3
|
||||
metrics:
|
||||
- type: acc
|
||||
value: 48.29
|
||||
name: Acc
|
||||
source:
|
||||
url: https://github.com/eduagarcia/lm-evaluation-harness-pt
|
||||
name: oab_exams
|
||||
base_model: Qwen/Qwen3-1.7B-Base
|
||||
---
|
||||
|
||||
# Tucano2-qwen-1.5B-Base
|
||||
|
||||
<img src="./logo.png" alt="An illustration of a Tucano bird showing vibrant colors like yellow, orange, blue, green, and black." height="200">
|
||||
|
||||
## Model Summary
|
||||
|
||||
**[Tucano2-qwen-1.5B-Base](https://huggingface.co/Polygl0t/Tucano2-qwen-1.5B-Base)** is a decoder-only transformer continually pretrained from [Qwen3-1.7B-Base](https://huggingface.co/Qwen/Qwen3-1.7B-Base). Tucano2 is part of the [Polygl0t](https://huggingface.co/Polygl0t) initiative, which aims to advance language models for low-resource languages.
|
||||
|
||||
Tucano2-qwen-1.5B-Base shares the same tokenizer as **[Tucano2-0.6B-Base](https://huggingface.co/Polygl0t/Tucano2-0.6B-Base)**. Token embedding transplantation via _Orthogonal Matching Pursuit_ was used to adapt Qwen3-1.7B-Base to be more sensitive to the lexical, morphological, and orthographic properties of Portuguese.
|
||||
|
||||
The model was continually pretrained on approximately 50 billion tokens and achieves state-of-the-art performance across several benchmarks designed to evaluate Portuguese language models. **All data, source code, and recipes used to develop the Tucano2 series are open and fully reproducible.**
|
||||
|
||||
## Details
|
||||
|
||||
- **Architecture:** a Transformer-based model ([`qwen3`](https://huggingface.co/docs/transformers/main/en/model_doc/qwen3))
|
||||
- **Size:** 1,510,073,344 parameters
|
||||
- **Context length:** 4,096 tokens
|
||||
- **Dataset(s):**
|
||||
- [Polygl0t/gigaverbo-v2](https://huggingface.co/datasets/Polygl0t/gigaverbo-v2)
|
||||
- [Polygl0t/gigaverbo-v2-synth](https://huggingface.co/datasets/Polygl0t/gigaverbo-v2-synth)
|
||||
- **Language(s):** Portuguese
|
||||
- **Batch size:** 1,048,576 tokens
|
||||
- **Number of steps:** 100,000
|
||||
- **GPU:** 8 NVIDIA A100-SXM4-80GB
|
||||
- **Training time**: ~ 215 hours
|
||||
- **Emissions:** 334 KgCO2 (Germany)
|
||||
- **Total energy consumption:** 878 kWh
|
||||
|
||||
This repository has the [source code](https://github.com/Polygl0t/llm-foundry) used to train this model. The full configuration used for training is available in the following config file:
|
||||
|
||||
- Single stage (linear warmup with cosine decay): [training_config.yaml](training_config.yaml)
|
||||
|
||||
### Checkpoints
|
||||
|
||||
Checkpoints were saved every 2,500 steps, which equates to approximately 2.5 billion tokens. The main branch of this repository contains the final checkpoint saved at step 100000. All other checkpoints are available as separate branches. To load a specific checkpoint, you can use the following code snippet:
|
||||
|
||||
```python
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
|
||||
model_id = "Polygl0t/Tucano2-qwen-1.5B-Base"
|
||||
revision = "step-2500" # Change this to the desired checkpoint branch
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id, revision=revision)
|
||||
```
|
||||
|
||||
Or, you can access all the revisions for the models via the following code snippet:
|
||||
|
||||
```python
|
||||
from huggingface_hub import list_repo_refs
|
||||
out = list_repo_refs("Polygl0t/Tucano2-qwen-1.5B-Base")
|
||||
branches = [b.name for b in out.branches]
|
||||
print(branches)
|
||||
```
|
||||
|
||||
<details>
|
||||
<summary><b>Learning Curves</b></summary>
|
||||
|
||||

|
||||
|
||||
This plot illustrates the evolution of model performance (measured by loss) as a function of training time, measured in tokens seen during training
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Gradient Norms (L2)</b></summary>
|
||||
|
||||

|
||||
|
||||
This plot illustrates the evolution of gradient norms as a function of training time, measured in tokens seen during training.
|
||||
|
||||
</details>
|
||||
|
||||
## Intended Uses
|
||||
|
||||
The primary intended use of Tucano2-qwen-1.5B-Base is to serve as a foundation for research and development involving Portuguese language modeling. Checkpoints saved during training are designed to provide a controlled setting for performing comparative experiments, specifically regarding the effects of continual pretraining on the performance of currently available benchmarks. You may also fine-tune and adapt Tucano2-qwen-1.5B-Base for deployment if your use follows the Apache 2.0 license. If you decide to use Tucano2-qwen-1.5B-Base as a basis for your fine-tuned model, please conduct your own risk and bias assessment.
|
||||
|
||||
## Out-of-scope Use
|
||||
|
||||
- Tucano2-qwen-1.5B-Base is **not intended for deployment**. It is not an out-of-the-box product and should not be used for human-facing interactions.
|
||||
- Tucano2-qwen-1.5B-Base is for **the Portuguese language only** and is unsuitable for text generation tasks in other languages.
|
||||
- Tucano2-qwen-1.5B-Base has **not been fine-tuned** for downstream tasks.
|
||||
|
||||
## Basic usage
|
||||
|
||||
```python
|
||||
from transformers import GenerationConfig, TextGenerationPipeline, AutoTokenizer, AutoModelForCausalLM
|
||||
import torch
|
||||
|
||||
# Specify the model and tokenizer
|
||||
model_id = "Polygl0t/Tucano2-qwen-1.5B-Base"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id)
|
||||
|
||||
# Specify the generation parameters as you like
|
||||
generation_config = GenerationConfig(
|
||||
**{
|
||||
"do_sample": True,
|
||||
"max_new_tokens": 150,
|
||||
"renormalize_logits": True,
|
||||
"repetition_penalty": 1.2,
|
||||
"temperature": 0.1,
|
||||
"top_k": 50,
|
||||
"top_p": 1.0,
|
||||
"use_cache": True,
|
||||
}
|
||||
)
|
||||
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
generator = TextGenerationPipeline(model=model, task="text-generation", tokenizer=tokenizer, device=device)
|
||||
|
||||
# Generate text
|
||||
prompt = "# A floresta da Amazônia: um lugar de Magia\n\n"
|
||||
completion = generator(prompt, generation_config=generation_config)
|
||||
print(completion[0]['generated_text'])
|
||||
```
|
||||
|
||||
## Limitations
|
||||
|
||||
As almost all other language models trained on large text datasets scraped from the web, the Tucano2-qwen-1.5B-Base shows behavior that does not make it an out-of-the-box solution to many real-world applications, especially those requiring factual, reliable, and nontoxic text generation. Tucano2-qwen-1.5B-Base is subject to the following:
|
||||
|
||||
- **Hallucinations:** Tucano2-qwen-1.5B-Base can produce content that can be mistaken as facts, but is misleading or entirely false, i.e., hallucinations.
|
||||
|
||||
- **Biases and Toxicity:** Tucano2-qwen-1.5B-Base inherits the social and historical stereotypes from the data used to train it. Given these biases, the model can produce toxic content, i.e., harmful, offensive, or detrimental to individuals, groups, or communities.
|
||||
|
||||
- **Language Limitations:** Tucano2-qwen-1.5B-Base is primarily designed to interact with Portuguese. Other languages might challenge its comprehension, leading to potential misinterpretations or errors in response.
|
||||
|
||||
- **Repetition and Verbosity:** Tucano2-qwen-1.5B-Base may get stuck on repetition loops (especially if the repetition penalty during generations is set to a meager value) or produce verbose responses unrelated to the prompt it was given.
|
||||
|
||||
Hence, even though Tucano2-qwen-1.5B-Base is released under a permissive license, we urge users to perform their own risk analysis before using it for real-world applications.
|
||||
|
||||
## Evaluations
|
||||
|
||||
The table below compares the Tucano2 series against other base models of similar size. We divide our evaluations into two sets:
|
||||
|
||||
- **Easy Set**: CALAME, GlobalPIQA, LAMBADA, ARC-Challenge, HellaSwag
|
||||
- **Hard Set**: ENEM, BLUEX, OAB Exams, BELEBELE, MMLU
|
||||
|
||||
The NPM (Normalized Performance Metric) provides a balanced view of model performance across tasks, accounting for each task's inherent difficulty by normalizing its evaluation score relative to its random baseline.
|
||||
|
||||
| | Total Avg. | Easy Set (NPM) | Hard Set (NPM) |
|
||||
| -------------------------- | ---------- | -------------- | -------------- |
|
||||
| **Tucano2-qwen-3.7B-Base** | 59.21 | 57.41 | 61 |
|
||||
| Qwen2.5-7B | 57.97 | 54.12 | 61.83 |
|
||||
| Qwen3-4B-Base | 57.86 | 52.52 | 63.2 |
|
||||
| SmolLM3-3B-Base | 50.25 | 54.06 | 46.44 |
|
||||
| Qwen2.5-3B | 50.16 | 47.69 | 52.62 |
|
||||
| **Tucano2-qwen-1.5B-Base** | 47.9 | 47.97 | 47.82 |
|
||||
| Curio-edu-7b | 45.66 | 57.46 | 33.87 |
|
||||
| Qwen3-1.7B-Base | 44.48 | 40.94 | 48.03 |
|
||||
| Curio-7b | 42.79 | 58.97 | 26.6 |
|
||||
| Llama-3.2-3B | 40.5 | 43.79 | 37.21 |
|
||||
| granite-3.3-2b-base | 39.97 | 45.31 | 34.63 |
|
||||
| **Tucano2-qwen-0.5B-Base** | 35.36 | 39.93 | 30.79 |
|
||||
| Qwen3-0.6B-Base | 29.4 | 26.41 | 32.38 |
|
||||
| Llama-2-7b-hf | 29.36 | 42.69 | 16.03 |
|
||||
| **Tucano2-0.6B-Base** | 20.64 | 40.28 | 0.99 |
|
||||
| Qwen2.5-0.5B | 19.89 | 18.7 | 21.09 |
|
||||
| Curio-1.1b | 19.23 | 39.16 | -0.69 |
|
||||
| Tucano-2b4 | 17.88 | 33.55 | 2.2 |
|
||||
| Curio-edu-1b1 | 17.72 | 34.77 | 0.67 |
|
||||
| Llama-3.2-1B | 16.57 | 28.32 | 4.83 |
|
||||
| Tucano-1b1 | 15.44 | 29.12 | 1.76 |
|
||||
| Tucano-630m | 14.9 | 26.99 | 2.8 |
|
||||
| Carvalho_pt-gl-1.3B | 12.54 | 26.75 | -1.66 |
|
||||
| TeenyTinyLlama-460m | 11.18 | 19.65 | 2.72 |
|
||||
| Tucano-160m | 8.78 | 19.12 | -1.56 |
|
||||
| TeenyTinyLlama-160m | 7.72 | 15.75 | -0.31 |
|
||||
| GlorIA-1.3B | 5.93 | 27.27 | -15.42 |
|
||||
|
||||
<details>
|
||||
<summary><b>Evaluation Suite</b></summary>
|
||||
|
||||
| **Benchmark** | **n-shot** | **Type** | **Baseline** | **Metric** |
|
||||
| --------------- | ---------- | ------------------ | ------------ | ---------- |
|
||||
| **Easy Set** | | | | |
|
||||
| CALAME | 5-shot | Completion | 0 | `acc` |
|
||||
| GlobalPIQA | 5-shot | Completion | 50 | `acc_norm` |
|
||||
| LAMBADA | 5-shot | Completion | 0 | `acc` |
|
||||
| ARC-Challenge | 5-shot | MC-Q&A | 25 | `acc_norm` |
|
||||
| HellaSwag | 5-shot | Completion | 25 | `acc_norm` |
|
||||
| **Hard Set** | | | | |
|
||||
| ENEM | 3-shot | MC-Q&A | 20 | `acc` |
|
||||
| BLUEX | 3-shot | MC-Q&A | 22.5 | `acc` |
|
||||
| OAB Exams | 3-shot | MC-Q&A | 25 | `acc` |
|
||||
| BELEBELE | 5-shot | MC-Q&A | 25 | `acc_norm` |
|
||||
| MMLU | 5-shot | MC-Q&A | 25 | `acc` |
|
||||
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary><b>Individual Benchmarks</b></summary>
|
||||
|
||||
| | BLUEX | ENEM | OAB | ARC Challenge | BELEBELE | CALAME | Global PIQA | HellaSwag | LAMBADA | MMLU |
|
||||
| -------------------------- | ----- | ----- | ----- | ------------- | -------- | ------ | ----------- | --------- | ------- | ----- |
|
||||
| **Tucano2-qwen-3.7B-Base** | 66.2 | 77.54 | 58.45 | 57.78 | 83.67 | 61.08 | 83 | 65.32 | 62.53 | 65.4 |
|
||||
| Qwen2.5-7B | 65.92 | 75.02 | 55.03 | 54.19 | 89.67 | 58.96 | 78 | 67.92 | 59.52 | 68.55 |
|
||||
| Qwen3-4B-Base | 69.96 | 77.61 | 55.58 | 54.53 | 87.89 | 57.95 | 77 | 63.19 | 60.37 | 68.59 |
|
||||
| SmolLM3-3B-Base | 54.52 | 61.37 | 45.51 | 51.37 | 77.67 | 59.15 | 81 | 65.57 | 59.89 | 56.19 |
|
||||
| Qwen2.5-3B | 58.28 | 67.32 | 50.34 | 45.21 | 83.22 | 58.38 | 75 | 59.44 | 57.17 | 59.79 |
|
||||
| **Tucano2-qwen-1.5B-Base** | 55.91 | 68.72 | 48.29 | 48.21 | 74 | 59.06 | 77 | 56.25 | 54.2 | 54.04 |
|
||||
| Curio-edu-7b | 47.15 | 58.64 | 43.78 | 50.94 | 53 | 60.79 | 86 | 66.48 | 64.62 | 45.14 |
|
||||
| Qwen3-1.7B-Base | 57.16 | 65.22 | 45.79 | 47.18 | 77.89 | 53.56 | 67 | 52.55 | 50.81 | 55.49 |
|
||||
| Curio-7b | 43.39 | 50.59 | 39.68 | 48.03 | 45.33 | 63.44 | 89 | 67.58 | 65.94 | 40.83 |
|
||||
| Llama-3.2-3B | 50.35 | 53.04 | 39.45 | 41.11 | 68.89 | 54.48 | 69 | 59.14 | 59.48 | 48.28 |
|
||||
| granite-3.3-2b-base | 45.34 | 54.02 | 39.54 | 41.37 | 65.67 | 58.77 | 70 | 60.81 | 58.22 | 45.63 |
|
||||
| **Tucano2-qwen-0.5B-Base** | 46.87 | 55.14 | 40.36 | 37.44 | 53.89 | 58.67 | 74 | 48.43 | 45.14 | 39.68 |
|
||||
| Qwen3-0.6B-Base | 42.98 | 49.48 | 40.46 | 36.92 | 65 | 45.95 | 54 | 40.33 | 41.78 | 43.54 |
|
||||
| Llama-2-7b-hf | 31.29 | 31.77 | 35.49 | 42.14 | 41.44 | 54.53 | 67 | 56.76 | 59.73 | 38.64 |
|
||||
| **Tucano2-0.6B-Base** | 21.14 | 23.58 | 23.28 | 37.01 | 26.22 | 57.61 | 79 | 47.74 | 39.45 | 27.18 |
|
||||
| Qwen2.5-0.5B | 32.55 | 38.91 | 35.9 | 28.46 | 49.56 | 44.89 | 44 | 37.7 | 39.08 | 41.17 |
|
||||
| Curio-1.1b | 21.56 | 21.06 | 23.1 | 30.43 | 22.89 | 59.25 | 75 | 49.45 | 46.69 | 26.35 |
|
||||
| Tucano-2b4 | 25.45 | 21.62 | 26.74 | 30.43 | 25.89 | 50.34 | 73 | 48.85 | 32.39 | 26.24 |
|
||||
| Curio-edu-1b1 | 23.5 | 19.87 | 25.01 | 32.22 | 26.22 | 54.91 | 69 | 46.3 | 42.93 | 25.43 |
|
||||
| Llama-3.2-1B | 24.06 | 23.93 | 26.06 | 31.71 | 33.33 | 50 | 55 | 45.27 | 45.6 | 28.51 |
|
||||
| Tucano-1b1 | 25.45 | 21.55 | 26.38 | 30.09 | 25.67 | 48.94 | 68 | 44.1 | 28.43 | 25.26 |
|
||||
| Tucano-630m | 26.7 | 21.69 | 26.92 | 28.72 | 27.33 | 47.3 | 68 | 40.37 | 26.2 | 25.6 |
|
||||
| Carvalho_pt-gl-1.3B | 19.33 | 18.12 | 22.32 | 27.01 | 26.44 | 53.42 | 63 | 38.53 | 33.59 | 24.82 |
|
||||
| TeenyTinyLlama-460m | 25.87 | 20.15 | 27.02 | 27.35 | 28.11 | 42.49 | 59 | 34.81 | 21.56 | 26.65 |
|
||||
| Tucano-160m | 24.76 | 20.57 | 17.22 | 25.56 | 23.44 | 43.59 | 59 | 33.73 | 21.64 | 25.77 |
|
||||
| TeenyTinyLlama-160m | 22.53 | 18.89 | 22.32 | 24.02 | 26.78 | 39.79 | 58 | 29.89 | 17.74 | 25.74 |
|
||||
| GlorIA-1.3B | 4.31 | 2.52 | 4.69 | 26.41 | 22.78 | 54.67 | 64 | 36.35 | 36.68 | 23.69 |
|
||||
|
||||
</details>
|
||||
|
||||
## Performance and Compute
|
||||
|
||||
Below, we display the performance of Tucano2-qwen-1.5B-Base across all benchmarks in our evaluation suite. Tucano2-qwen-1.5B-Base is compared with Qwen3-1.7B-Base, the base model from which they were continually pretrained. The percentage variation in performance is displayed in terms of the difference in evaluation scores between the Base and the Continually Pretrained model.
|
||||
|
||||
All individual benchmark scores and their evolution across training time can be found in the [.plots](https://huggingface.co/Polygl0t/Tucano2-qwen-1.5B-Base/tree/main/.plots/) folder.
|
||||
|
||||
**Before and After Continual Pretraining**
|
||||
|
||||

|
||||
|
||||
This plot compares the compute requirements (measured as C = 6 \* N \* D, where N is the number of parameters and D is the number of tokens processed) against the performance of each model (measured by the NPM score).
|
||||
|
||||

|
||||
|
||||
<details>
|
||||
<summary><b>Performance and Compute Details</b></summary>
|
||||
|
||||
| | Parameters (B) | Pretraining Tokens (B) | Continual Pretraining Tokens (B) | Total Tokens (B) | Pretraining Compute (FLOPs) | Continual Pretraining Compute (FLOPs) | Total Compute (FLOPs) | NPM Score |
|
||||
|----------------------------|----------------|------------------------|----------------------------------|------------------|-----------------------------|---------------------------------------|-----------------------|-----------|
|
||||
| **Tucano2-qwen-3.7B-Base** | 3.7 | 36000 | 50 | 36050 | 8.64e+23 | 1.11e+21 | 8.65e+23 | 59.2 |
|
||||
| Qwen2.5-7B | 7 | 18000 | - | 18000 | 7.56e+23 | - | 7.56e+23 | 57.97 |
|
||||
| Qwen3-4B-Base | 4 | 36000 | - | 36000 | 8.64e+23 | - | 8.64e+23 | 57.86 |
|
||||
| SmolLM3-3B-Base | 3 | 11200 | - | 11200 | 2.02e+23 | - | 2.02e+23 | 50.25 |
|
||||
| Qwen2.5-3B | 3 | 18000 | - | 18000 | 3.24e+23 | - | 3.24e+23 | 50.15 |
|
||||
| **Tucano2-qwen-1.5B-Base** | 1.5 | 36000 | 100 | 36100 | 3.67e+23 | 9e+20 | 3.68e+23 | 47.89 |
|
||||
| Curio-edu-7b | 7 | 2000 | 20 | 2020 | 8.4e+22 | 8.4e+20 | 8.48e+22 | 45.66 |
|
||||
| Qwen3-1.7B-Base | 1.7 | 36000 | - | 36000 | 3.67e+23 | - | 3.67e+23 | 44.48 |
|
||||
| Curio-7b | 7 | 2000 | 150 | 2150 | 8.4e+22 | 6.3e+21 | 9.03e+22 | 42.78 |
|
||||
| Llama-3.2-3B | 3 | 9000 | - | 9000 | 1.62e+23 | - | 1.62e+23 | 40.5 |
|
||||
| granite-3.3-2b-base | 2 | 12000 | - | 12000 | 1.44e+23 | - | 1.44e+23 | 39.96 |
|
||||
| **Tucano2-qwen-0.5B-Base** | 0.5 | 36000 | 50 | 36050 | 1.3e+23 | 1.5e+20 | 1.3e+23 | 35.35 |
|
||||
| Qwen3-0.6B-Base | 0.6 | 36000 | - | 36000 | 1.3e+23 | - | 1.3e+23 | 29.39 |
|
||||
| Llama-2-7b-hf | 7 | 2000 | - | 2000 | 8.4e+22 | - | 8.4e+22 | 29.36 |
|
||||
| **Tucano2-0.6B-Base** | 0.6 | 408 | - | 408 | 1.47e+21 | - | 1.47e+21 | 20.63 |
|
||||
| Qwen2.5-0.5B | 0.5 | 18000 | - | 18000 | 5.4e+22 | - | 5.4e+22 | 19.89 |
|
||||
| Curio-1.1b | 1.1 | 1000 | 150 | 1150 | 6.6e+21 | 9.9e+20 | 7.59e+21 | 19.23 |
|
||||
| Tucano-2b4 | 2.4 | 515 | - | 515 | 7.42e+21 | - | 7.42e+21 | 17.87 |
|
||||
| Curio-edu-1b1 | 1.1 | 1000 | 20 | 1020 | 6.6e+21 | 1.32e+20 | 6.73e+21 | 17.72 |
|
||||
| Llama-3.2-1B | 1 | 9000 | - | 9000 | 5.4e+22 | - | 5.4e+22 | 16.57 |
|
||||
| Tucano-1b1 | 1.1 | 250 | - | 250 | 1.65e+21 | - | 1.65e+21 | 15.44 |
|
||||
| Tucano-630m | 0.63 | 211 | - | 211 | 7.98e+20 | - | 7.98e+20 | 14.89 |
|
||||
| Carvalho_pt-gl-1.3B | 1.3 | 26 | 5 | 31 | 2.03e+20 | 3.9e+19 | 2.42e+20 | 12.54 |
|
||||
| TeenyTinyLlama-460m | 0.46 | 6.2 | - | 6.2 | 1.71e+19 | - | 1.71e+19 | 11.18 |
|
||||
| Tucano-160m | 0.16 | 169 | - | 169 | 1.62e+20 | - | 1.62e+20 | 8.78 |
|
||||
| TeenyTinyLlama-160m | 0.16 | 6.2 | - | 6.2 | 5.95e+18 | - | 5.95e+18 | 7.71 |
|
||||
| GlorIA-1.3B | 1.3 | 35 | - | 35 | 2.73e+20 | - | 2.73e+20 | 5.92 |
|
||||
|
||||
</details>
|
||||
|
||||
## Cite as 🤗
|
||||
|
||||
```latex
|
||||
@misc{correa2026tucano2cool,
|
||||
title={{Tucano 2 Cool: Better Open Source LLMs for Portuguese}},
|
||||
author={Nicholas Kluge Corr{\^e}a and Aniket Sen and Shiza Fatimah and Sophia Falk and Lennard Landgraf and Julia Kastner and Lucie Flek},
|
||||
year={2026},
|
||||
eprint={2603.03543},
|
||||
archivePrefix={arXiv},
|
||||
primaryClass={cs.CL},
|
||||
url={https://arxiv.org/abs/2603.03543},
|
||||
}
|
||||
```
|
||||
|
||||
## Aknowlegments
|
||||
|
||||
Polyglot is a project funded by the Federal Ministry of Education and Research (BMBF) and the Ministry of Culture and Science of the State of North Rhine-Westphalia (MWK) as part of TRA Sustainable Futures (University of Bonn) and the Excellence Strategy of the federal and state governments.
|
||||
|
||||
We also gratefully acknowledge the granted access to the [Marvin cluster](https://www.hpc.uni-bonn.de/en/systems/marvin) hosted by [University of Bonn](https://www.uni-bonn.de/en) along with the support provided by its High Performance Computing & Analytics Lab.
|
||||
|
||||
## License
|
||||
|
||||
Tucano2-qwen-1.5B-Base is licensed under the Apache License, Version 2.0. For more details, see the [LICENSE](LICENSE) file.
|
||||
62
config.json
Normal file
62
config.json
Normal file
@@ -0,0 +1,62 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 1,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 2,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 2048,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 6144,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 4096,
|
||||
"max_window_layers": 28,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 16,
|
||||
"num_hidden_layers": 28,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 49109,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": true,
|
||||
"torch_dtype": "bfloat16",
|
||||
"transformers_version": "4.53.2",
|
||||
"use_cache": true,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 49152
|
||||
}
|
||||
43
emissions.csv
Normal file
43
emissions.csv
Normal file
@@ -0,0 +1,43 @@
|
||||
timestamp,project_name,run_id,experiment_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
|
||||
2026-01-04T00:53:29,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,1672.0128981360467,0.3616482249474259,0.0002162951167126,45.019915875,484.7646790573522,70.0,0.0201888648940545,0.897753190146414,0.0313904687747983,0.9493325238152668,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-04T05:49:41,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,19444.25734276499,4.2291008795863885,0.0002174987095179,45.019199025000006,482.7655568575297,70.0,0.2348000245067472,10.501583710704685,0.3650754578228039,11.101459193034229,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-04T11:11:30,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,38753.11218811601,8.43492817140923,0.0002176580846065,45.01831363714286,484.5844171455976,70.0,0.4679794979088908,20.94621371362422,0.7276316248212911,22.14182483635445,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-04T16:33:27,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,58069.61899157299,12.637987434252608,0.0002176351016886,45.01415124000002,484.63982451788416,70.0,0.7012435854164667,31.38336156972471,1.0903192030491542,33.17492435819033,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-04T21:55:15,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,77377.71302408102,16.8434678120622,0.0002176785427455,45.020718261,472.78786459711847,70.0,0.934410551153674,41.82711327666385,1.452855518716594,44.214379346534194,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-05T03:17:01,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,96684.46855293802,21.04547828219133,0.0002176717584238,45.01916109,480.4906651352453,70.0,1.1675632860385523,52.26179285662305,1.8153696276265392,55.2447257702883,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-05T08:38:49,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,115991.63376416505,25.247856532391783,0.0002176696345507,45.022424445000006,487.6202874492108,70.0,1.400724185860845,62.69741657400424,2.177896862898236,66.27603762276357,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-05T14:00:35,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,135298.054422079,29.44756302094976,0.0002176495674437,45.01929460500001,486.7508296644243,70.0,1.633874360692557,73.12605403524107,2.5404076611594792,77.30033605709347,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-05T19:22:23,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,154606.302445931,33.650672001514444,0.0002176539472786,45.01864712571429,481.2120431259584,70.0,1.867044639775351,83.56357299774962,2.9029484512387627,88.33356608876346,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-06T00:44:09,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,173912.178886339,37.85256180611232,0.0002176533124275,45.01715610375,483.6554985866221,70.0,2.100189815705816,93.99795461913716,3.265451328675393,99.36359576351836,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-06T06:05:54,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,193217.17706548003,42.05097558628693,0.0002176358035291,45.01952937000001,484.7059326018373,70.0,2.33332110738652,104.42324762964276,3.627932080104468,110.38450081713329,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-06T11:27:47,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,212530.15787969803,46.25285688778893,0.0002176296171293,45.02398519038461,808.9535299055012,70.0,2.5665519780566304,114.85738787333752,3.9905683197285895,121.414508171122,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-06T16:49:35,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,231837.855038024,50.45379269585461,0.0002176253428827,0.0,535.4665927768112,70.0,2.7997165445120697,125.28892996300908,4.353387081808224,132.44203358932828,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-06T22:11:28,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,251150.89791434805,54.65462820015403,0.0002176166944017,45.01846470214287,485.30602055119977,70.0,3.0329467619813424,135.7203271267309,4.716021819789988,143.4692957085025,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-07T03:33:36,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,270479.23990876006,58.856623712839706,0.000217601261127,45.02421498375001,665.4842286309712,70.0,3.266363310468128,146.1542949405653,5.078944617688521,154.49960286872215,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-07T08:55:22,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,289785.181663043,63.05984593456516,0.0002176089390515,45.01917849461539,868.9230926440835,70.0,3.4995023270463355,156.5921901961529,5.441437637097453,165.53313016029705,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-07T14:17:09,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,309092.21288732707,67.25836504844831,0.0002175996749324,0.0,0.0,70.0,3.732656094991943,167.01742023355234,5.804235388080284,176.55431171662505,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-07T19:38:58,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,328400.79333176,71.46097276904095,0.0002176029236837,45.01939423500001,479.4653427247581,70.0,3.965831424019364,177.45360994638622,6.166784562239569,187.58622593264457,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-08T01:00:47,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,347709.7525634171,75.66309031421144,0.0002176041648426,45.019186976250005,481.31254917203177,70.0,4.199013129390241,187.8884982006786,6.529342100069741,198.61685343013897,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-08T06:22:36,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,367019.01668525406,79.86450018570841,0.0002176031664707,45.02422620000001,547.7264625280638,70.0,4.432194094217369,198.32152950654145,6.89189967187149,209.64562327263,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-08T11:44:30,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,386333.212516612,84.06487647912063,0.0002175968147587,45.02229371100001,483.01274136565706,70.0,4.665430583328959,208.7517054509535,7.254543921410093,220.67167995569136,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-08T17:06:16,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,405639.152044852,88.2633213389953,0.0002175907352484,45.021496023000005,483.6714764025161,70.0,4.89857015528936,219.1770576740061,7.617038764733076,231.69266659402888,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-08T22:28:05,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,424947.72908463504,92.46002132998896,0.0002175797515829,45.0169352775,478.339030320535,70.0,5.131743782461101,229.5977411369355,7.97958800353076,242.7090729229268,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-09T03:49:56,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,444259.0430381961,96.65982774527176,0.0002175753746828,45.02668143681819,634.4243733501488,70.0,5.3649481096869005,240.0265014163263,8.34218414053561,253.7336336665488,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-09T09:11:50,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,463572.824956201,100.86067552519698,0.0002175724505307,45.01943568000001,484.4552802111328,70.0,5.598176539522024,250.45793349952004,8.70481797040043,264.7609280094421,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-09T14:33:51,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,482894.124817878,105.0652181909104,0.000217574024597,45.017861946428575,482.8041853022498,70.0,5.831504167079657,260.8988112641604,9.067606056909732,275.7979214881491,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-09T19:55:46,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,502208.942610628,109.27042377206406,0.0002175796058191,45.0263263307143,908.9151128213846,70.0,6.06475756903236,271.3413464465148,9.430551115216456,286.8366551307627,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-10T01:17:37,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,521519.596273953,113.4744512965224,0.0002175842520726,45.02019154500001,481.5145843114581,70.0,6.297954221678273,281.78120735561833,9.79313477792758,297.8722963552235,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-10T06:39:24,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,540827.3727985891,117.67560737074152,0.0002175844147122,45.01754935875,482.4771736798942,70.0,6.531114639119297,292.2136213551536,10.155663981423242,308.9003999756964,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-10T12:01:12,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,560135.153515207,121.87618752281664,0.0002175835363268,45.01880951625,482.430054489862,70.0,6.764277945929993,302.6445168115299,10.5181970336314,319.9269917910924,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-10T17:23:02,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,579444.889759472,126.07697126437505,0.0002175823335273,45.01756854,507.0251537355392,70.0,6.997466515479824,313.0758827285595,10.88076878818248,330.9541180322222,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-10T22:44:50,Polyglot,18fae7ac-5cb7-4fcf-af97-7261add1c5b3,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,598752.613904556,130.27444202106778,0.0002175764063417,45.01916073409091,692.3847170751268,70.0,7.230630347631746,323.49861499646056,11.243302289631842,341.97254763372564,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-11T20:22:21,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,19694.33545038849,4.140420121964514,0.0002102340610778,45.02229774230769,715.6152593669873,70.0,0.2379094903649518,10.260946260417144,0.3698149985669284,10.868670749349032,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-12T01:41:11,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,38823.88746571168,8.259854018045177,0.0002127518534907,45.02043981,373.17769863629735,70.0,0.4689248080536968,20.48432003716533,0.7290074662264722,21.68225231144553,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-12T07:00:02,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,57955.124901412055,12.37992186382978,0.0002136122022839,45.01808452285714,385.4241548433552,70.0,0.699966693433385,30.709290225190443,1.088241083291844,32.49749800191568,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-12T12:18:56,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,77088.85968300328,16.49728420820384,0.0002140034795694,45.02558758846154,759.4654963242226,70.0,0.931039507515274,40.92707902608152,1.4475231731988447,43.30564170679575,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-12T17:37:53,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,96225.60899510048,20.61482729975355,0.0002142343136617,45.01869930000001,430.06255801322726,70.0,1.1621392468993312,51.145273313686005,1.8068473153918945,54.1142598759773,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-12T22:56:47,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,115360.29743119702,24.732333252545853,0.0002143920725178,45.025926885,380.7974883712888,70.0,1.3932172625401595,61.36342596820157,2.1661373245693256,64.92278055531133,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-13T04:15:45,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,134497.9721878525,28.847225996051005,0.0002144807503548,45.033311655000006,387.6305330294473,70.0,1.6243354261510627,71.57461952076096,2.525486570480943,75.72444151739337,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-13T09:34:46,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,153638.7588795405,32.96794933170222,0.0002145809402011,45.02050812000001,379.04621907408807,70.0,1.855488253191034,81.80102649354677,2.8848931327789025,86.54140787951758,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-13T14:53:34,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,172767.15908273123,37.08209324325416,0.0002146362389711,45.022813326000005,378.2701073055912,70.0,2.086489789305811,92.01055236393864,3.2440609921913874,97.34110314543685,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
2026-01-13T14:53:42,Polyglot,f2a62502-6236-4d25-95f8-079b2519fd37,5b0fa12a-3dd7-45bb-9766-cc326314d9f1,172775.305582162,37.08372771929324,0.0002146355788192101,45.01390842,1795.2327740678572,70.0,2.0865850073798553,92.01459960217636,3.2442090622980118,97.3453936718552,Germany,DEU,north rhine-westphalia,,,Linux-5.14.0-570.35.1.el9_6.x86_64-x86_64-with-glibc2.34,3.12.3,3.0.6,256,AMD EPYC 7713 64-Core Processor,4,4 x NVIDIA A100-SXM4-80GB,7.0932,50.7263,1950,machine,N,1.0
|
||||
|
190
evals.yaml
Normal file
190
evals.yaml
Normal file
@@ -0,0 +1,190 @@
|
||||
evaluations:
|
||||
arc_challenge_poly_pt_acc: 0.44529914529914527
|
||||
arc_challenge_poly_pt_acc_norm: 0.48205128205128206
|
||||
arc_challenge_poly_pt_acc_norm_stderr: 0.014614459118720773
|
||||
arc_challenge_poly_pt_acc_stderr: 0.014536106383401307
|
||||
arc_challenge_poly_pt_alias: arc_challenge_poly_pt
|
||||
assin2_rte_acc,all: 0.8766339869281046
|
||||
assin2_rte_acc_stderr,all: 0.004699176594010998
|
||||
assin2_rte_alias: assin2_rte
|
||||
assin2_rte_f1_macro,all: 0.8755544782450612
|
||||
assin2_rte_f1_macro_stderr,all: 0.004739218474976754
|
||||
assin2_sts_alias: assin2_sts
|
||||
assin2_sts_mse,all: 1.0735661764705884
|
||||
assin2_sts_mse_stderr,all: N/A
|
||||
assin2_sts_pearson,all: 0.6290850483582386
|
||||
assin2_sts_pearson_stderr,all: 0.009612669804680212
|
||||
assin_entailment_acc: 0.708
|
||||
assin_entailment_acc_stderr: 0.007190057317647597
|
||||
assin_entailment_alias: assin_entailment
|
||||
assin_paraphrase_acc: 0.72475
|
||||
assin_paraphrase_acc_stderr: 0.007062884004258771
|
||||
assin_paraphrase_alias: assin_paraphrase
|
||||
belebele_por_Latn_acc: 0.74
|
||||
belebele_por_Latn_acc_norm: 0.74
|
||||
belebele_por_Latn_acc_norm_stderr: 0.014629271097998421
|
||||
belebele_por_Latn_acc_stderr: 0.014629271097998421
|
||||
belebele_por_Latn_alias: belebele_por_Latn
|
||||
bluex_acc,all: 0.5591098748261474
|
||||
bluex_acc,exam_id__UNICAMP_2018: 0.5370370370370371
|
||||
bluex_acc,exam_id__UNICAMP_2019: 0.6
|
||||
bluex_acc,exam_id__UNICAMP_2020: 0.509090909090909
|
||||
bluex_acc,exam_id__UNICAMP_2021_1: 0.6304347826086957
|
||||
bluex_acc,exam_id__UNICAMP_2021_2: 0.47058823529411764
|
||||
bluex_acc,exam_id__UNICAMP_2022: 0.6923076923076923
|
||||
bluex_acc,exam_id__UNICAMP_2023: 0.6511627906976745
|
||||
bluex_acc,exam_id__UNICAMP_2024: 0.5555555555555556
|
||||
bluex_acc,exam_id__USP_2018: 0.42592592592592593
|
||||
bluex_acc,exam_id__USP_2019: 0.4
|
||||
bluex_acc,exam_id__USP_2020: 0.5535714285714286
|
||||
bluex_acc,exam_id__USP_2021: 0.6346153846153846
|
||||
bluex_acc,exam_id__USP_2022: 0.4897959183673469
|
||||
bluex_acc,exam_id__USP_2023: 0.6136363636363636
|
||||
bluex_acc,exam_id__USP_2024: 0.6829268292682927
|
||||
bluex_acc_stderr,all: 0.01069785624296974
|
||||
bluex_acc_stderr,exam_id__UNICAMP_2018: 0.039296745462938605
|
||||
bluex_acc_stderr,exam_id__UNICAMP_2019: 0.04014798243504816
|
||||
bluex_acc_stderr,exam_id__UNICAMP_2020: 0.03888891915912078
|
||||
bluex_acc_stderr,exam_id__UNICAMP_2021_1: 0.0411282805992433
|
||||
bluex_acc_stderr,exam_id__UNICAMP_2021_2: 0.04024244267609041
|
||||
bluex_acc_stderr,exam_id__UNICAMP_2022: 0.04269098796102326
|
||||
bluex_acc_stderr,exam_id__UNICAMP_2023: 0.041929332285094205
|
||||
bluex_acc_stderr,exam_id__UNICAMP_2024: 0.04271556020713639
|
||||
bluex_acc_stderr,exam_id__USP_2018: 0.038960456443585575
|
||||
bluex_acc_stderr,exam_id__USP_2019: 0.04470992542423865
|
||||
bluex_acc_stderr,exam_id__USP_2020: 0.03835558472845869
|
||||
bluex_acc_stderr,exam_id__USP_2021: 0.03851223021094464
|
||||
bluex_acc_stderr,exam_id__USP_2022: 0.0410194387799713
|
||||
bluex_acc_stderr,exam_id__USP_2023: 0.04234932088737962
|
||||
bluex_acc_stderr,exam_id__USP_2024: 0.0418113153523233
|
||||
bluex_alias: bluex
|
||||
calame_pt_acc: 0.5905587668593449
|
||||
calame_pt_acc_stderr: 0.010794891914388602
|
||||
calame_pt_alias: calame_pt
|
||||
calame_pt_perplexity: 7.008747913313241
|
||||
calame_pt_perplexity_stderr: 0.40940358093832135
|
||||
enem_challenge_acc,all: 0.6871938418474458
|
||||
enem_challenge_acc,exam_id__2009: 0.6782608695652174
|
||||
enem_challenge_acc,exam_id__2010: 0.717948717948718
|
||||
enem_challenge_acc,exam_id__2011: 0.7521367521367521
|
||||
enem_challenge_acc,exam_id__2012: 0.7068965517241379
|
||||
enem_challenge_acc,exam_id__2013: 0.6666666666666666
|
||||
enem_challenge_acc,exam_id__2014: 0.6972477064220184
|
||||
enem_challenge_acc,exam_id__2015: 0.7058823529411765
|
||||
enem_challenge_acc,exam_id__2016: 0.6611570247933884
|
||||
enem_challenge_acc,exam_id__2016_2: 0.6422764227642277
|
||||
enem_challenge_acc,exam_id__2017: 0.6896551724137931
|
||||
enem_challenge_acc,exam_id__2022: 0.631578947368421
|
||||
enem_challenge_acc,exam_id__2023: 0.7037037037037037
|
||||
enem_challenge_acc_stderr,all: 0.0070891143834158395
|
||||
enem_challenge_acc_stderr,exam_id__2009: 0.0251403029631727
|
||||
enem_challenge_acc_stderr,exam_id__2010: 0.02405435432253117
|
||||
enem_challenge_acc_stderr,exam_id__2011: 0.023038334357693698
|
||||
enem_challenge_acc_stderr,exam_id__2012: 0.02443296265724745
|
||||
enem_challenge_acc_stderr,exam_id__2013: 0.02625818903872996
|
||||
enem_challenge_acc_stderr,exam_id__2014: 0.02543475203567573
|
||||
enem_challenge_acc_stderr,exam_id__2015: 0.0241011316238719
|
||||
enem_challenge_acc_stderr,exam_id__2016: 0.02479881231135445
|
||||
enem_challenge_acc_stderr,exam_id__2016_2: 0.024985945100694615
|
||||
enem_challenge_acc_stderr,exam_id__2017: 0.024741511708920926
|
||||
enem_challenge_acc_stderr,exam_id__2022: 0.024112138519174948
|
||||
enem_challenge_acc_stderr,exam_id__2023: 0.022685440228473772
|
||||
enem_challenge_alias: enem
|
||||
faquad_nli_acc,all: 0.7846153846153846
|
||||
faquad_nli_acc_stderr,all: 0.011396120309131366
|
||||
faquad_nli_alias: faquad_nli
|
||||
faquad_nli_f1_macro,all: 0.4396551724137931
|
||||
faquad_nli_f1_macro_stderr,all: 0.00357969847290883
|
||||
global_piqa_completions_por_latn_braz_acc: 0.8
|
||||
global_piqa_completions_por_latn_braz_acc_bytes: 0.77
|
||||
global_piqa_completions_por_latn_braz_acc_bytes_stderr: 0.042295258468165065
|
||||
global_piqa_completions_por_latn_braz_acc_norm: 0.77
|
||||
global_piqa_completions_por_latn_braz_acc_norm_stderr: 0.042295258468165065
|
||||
global_piqa_completions_por_latn_braz_acc_stderr: 0.04020151261036849
|
||||
global_piqa_completions_por_latn_braz_alias: global_piqa_completions_por_latn_braz
|
||||
hatebr_offensive_acc,all: 0.8064285714285714
|
||||
hatebr_offensive_acc_stderr,all: 0.0074826455677965455
|
||||
hatebr_offensive_alias: hatebr_offensive_binary
|
||||
hatebr_offensive_f1_macro,all: 0.801107069296415
|
||||
hatebr_offensive_f1_macro_stderr,all: 0.007665138669900729
|
||||
hellaswag_poly_pt_acc: 0.42539820132192
|
||||
hellaswag_poly_pt_acc_norm: 0.5624661393433742
|
||||
hellaswag_poly_pt_acc_norm_stderr: 0.005164166461307016
|
||||
hellaswag_poly_pt_acc_stderr: 0.005146684217488626
|
||||
hellaswag_poly_pt_alias: hellaswag_poly_pt
|
||||
lambada_poly_pt_acc: 0.5420143605666602
|
||||
lambada_poly_pt_acc_stderr: 0.006941341313928234
|
||||
lambada_poly_pt_alias: lambada_poly_pt
|
||||
lambada_poly_pt_perplexity: 9.820716308685725
|
||||
lambada_poly_pt_perplexity_stderr: 0.3120846033602529
|
||||
mmlu_poly_pt_acc: 0.5403782647853498
|
||||
mmlu_poly_pt_acc_stderr: 0.004317657624183865
|
||||
mmlu_poly_pt_alias: mmlu_poly_pt
|
||||
oab_exams_acc,all: 0.48291571753986334
|
||||
oab_exams_acc,exam_id__2010-01: 0.4588235294117647
|
||||
oab_exams_acc,exam_id__2010-02: 0.51
|
||||
oab_exams_acc,exam_id__2011-03: 0.46464646464646464
|
||||
oab_exams_acc,exam_id__2011-04: 0.45
|
||||
oab_exams_acc,exam_id__2011-05: 0.5
|
||||
oab_exams_acc,exam_id__2012-06: 0.4625
|
||||
oab_exams_acc,exam_id__2012-06a: 0.525
|
||||
oab_exams_acc,exam_id__2012-07: 0.5
|
||||
oab_exams_acc,exam_id__2012-08: 0.4625
|
||||
oab_exams_acc,exam_id__2012-09: 0.33766233766233766
|
||||
oab_exams_acc,exam_id__2013-10: 0.525
|
||||
oab_exams_acc,exam_id__2013-11: 0.525
|
||||
oab_exams_acc,exam_id__2013-12: 0.525
|
||||
oab_exams_acc,exam_id__2014-13: 0.475
|
||||
oab_exams_acc,exam_id__2014-14: 0.5375
|
||||
oab_exams_acc,exam_id__2014-15: 0.5641025641025641
|
||||
oab_exams_acc,exam_id__2015-16: 0.5375
|
||||
oab_exams_acc,exam_id__2015-17: 0.5384615384615384
|
||||
oab_exams_acc,exam_id__2015-18: 0.4625
|
||||
oab_exams_acc,exam_id__2016-19: 0.48717948717948717
|
||||
oab_exams_acc,exam_id__2016-20: 0.45
|
||||
oab_exams_acc,exam_id__2016-20a: 0.425
|
||||
oab_exams_acc,exam_id__2016-21: 0.4625
|
||||
oab_exams_acc,exam_id__2017-22: 0.45
|
||||
oab_exams_acc,exam_id__2017-23: 0.45
|
||||
oab_exams_acc,exam_id__2017-24: 0.5
|
||||
oab_exams_acc,exam_id__2018-25: 0.45
|
||||
oab_exams_acc_stderr,all: 0.006164493571290463
|
||||
oab_exams_acc_stderr,exam_id__2010-01: 0.03120711424338333
|
||||
oab_exams_acc_stderr,exam_id__2010-02: 0.028912621193308535
|
||||
oab_exams_acc_stderr,exam_id__2011-03: 0.028826912523627856
|
||||
oab_exams_acc_stderr,exam_id__2011-04: 0.03204801747078995
|
||||
oab_exams_acc_stderr,exam_id__2011-05: 0.03224202969176272
|
||||
oab_exams_acc_stderr,exam_id__2012-06: 0.03222923233485234
|
||||
oab_exams_acc_stderr,exam_id__2012-06a: 0.03229751885191722
|
||||
oab_exams_acc_stderr,exam_id__2012-07: 0.03239443199904663
|
||||
oab_exams_acc_stderr,exam_id__2012-08: 0.032144839789965185
|
||||
oab_exams_acc_stderr,exam_id__2012-09: 0.03103244684042299
|
||||
oab_exams_acc_stderr,exam_id__2013-10: 0.032222242709920586
|
||||
oab_exams_acc_stderr,exam_id__2013-11: 0.032249698626176736
|
||||
oab_exams_acc_stderr,exam_id__2013-12: 0.03225675063294939
|
||||
oab_exams_acc_stderr,exam_id__2014-13: 0.03217856982922958
|
||||
oab_exams_acc_stderr,exam_id__2014-14: 0.032246622088818386
|
||||
oab_exams_acc_stderr,exam_id__2014-15: 0.032435167155658584
|
||||
oab_exams_acc_stderr,exam_id__2015-16: 0.03223354880595777
|
||||
oab_exams_acc_stderr,exam_id__2015-17: 0.032573794785528166
|
||||
oab_exams_acc_stderr,exam_id__2015-18: 0.032123574402475284
|
||||
oab_exams_acc_stderr,exam_id__2016-19: 0.03271170717682627
|
||||
oab_exams_acc_stderr,exam_id__2016-20: 0.03203769414642788
|
||||
oab_exams_acc_stderr,exam_id__2016-20a: 0.031951776527517205
|
||||
oab_exams_acc_stderr,exam_id__2016-21: 0.03217984644292296
|
||||
oab_exams_acc_stderr,exam_id__2017-22: 0.03205629372165545
|
||||
oab_exams_acc_stderr,exam_id__2017-23: 0.03221345216992268
|
||||
oab_exams_acc_stderr,exam_id__2017-24: 0.03232207361521986
|
||||
oab_exams_acc_stderr,exam_id__2018-25: 0.03198727711742204
|
||||
oab_exams_alias: oab_exams
|
||||
portuguese_hate_speech_acc,all: 0.7297297297297297
|
||||
portuguese_hate_speech_acc_stderr,all: 0.010749375621571917
|
||||
portuguese_hate_speech_alias: portuguese_hate_speech_binary
|
||||
portuguese_hate_speech_f1_macro,all: 0.679463244638342
|
||||
portuguese_hate_speech_f1_macro_stderr,all: 0.01220967447481398
|
||||
tweetsentbr_acc,all: 0.7014925373134329
|
||||
tweetsentbr_acc_stderr,all: 0.007246042251471291
|
||||
tweetsentbr_alias: tweetsentbr
|
||||
tweetsentbr_f1_macro,all: 0.6540958473356445
|
||||
tweetsentbr_f1_macro_stderr,all: 0.007812938746547184
|
||||
step: 100000
|
||||
42
evals_all_steps.csv
Normal file
42
evals_all_steps.csv
Normal file
File diff suppressed because one or more lines are too long
27
evals_for_comparison.csv
Normal file
27
evals_for_comparison.csv
Normal file
File diff suppressed because one or more lines are too long
14
generation_config.json
Normal file
14
generation_config.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"bos_token_id": 1,
|
||||
"eos_token_id": 2,
|
||||
"pad_token_id": 49109,
|
||||
"transformers_version": "4.53.2",
|
||||
"do_sample": true,
|
||||
"max_new_tokens": 1024,
|
||||
"renormalize_logits": true,
|
||||
"repetition_penalty": 1.2,
|
||||
"temperature": 0.1,
|
||||
"top_k": 50,
|
||||
"top_p": 1.0,
|
||||
"use_cache": false
|
||||
}
|
||||
3
logo.png
Normal file
3
logo.png
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1856d91c3b35390cee5122902d94044657c67df7034ca4005316275c404fc8a0
|
||||
size 197189
|
||||
3
model.safetensors
Normal file
3
model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5bc9634572194040fe75c7ff1d23e941a22564fadbb000e7f664e3b6cfc8141c
|
||||
size 3020182248
|
||||
82
ruler.yaml
Normal file
82
ruler.yaml
Normal file
@@ -0,0 +1,82 @@
|
||||
model_name: Tucano2-qwen-1.5B-Base
|
||||
results:
|
||||
niah_pt_multikey_1_1024: 0.516
|
||||
niah_pt_multikey_1_1024_stderr: 0.022371610982580396
|
||||
niah_pt_multikey_1_2048: 0.444
|
||||
niah_pt_multikey_1_2048_stderr: 0.022242244375731048
|
||||
niah_pt_multikey_1_4096: 0.296
|
||||
niah_pt_multikey_1_4096_stderr: N/A
|
||||
niah_pt_multikey_1_alias: " - niah_pt_multikey_1"
|
||||
niah_pt_multikey_2_1024: 0.064
|
||||
niah_pt_multikey_2_1024_stderr: 0.01095664621097098
|
||||
niah_pt_multikey_2_2048: 0.012
|
||||
niah_pt_multikey_2_2048_stderr: 0.0048743737072649805
|
||||
niah_pt_multikey_2_4096: 0.024
|
||||
niah_pt_multikey_2_4096_stderr: N/A
|
||||
niah_pt_multikey_2_alias: " - niah_pt_multikey_2"
|
||||
niah_pt_multikey_3_1024: 0.57
|
||||
niah_pt_multikey_3_1024_stderr: 0.022162634426652866
|
||||
niah_pt_multikey_3_2048: 0.382
|
||||
niah_pt_multikey_3_2048_stderr: 0.02175082059125093
|
||||
niah_pt_multikey_3_4096: 0.254
|
||||
niah_pt_multikey_3_4096_stderr: N/A
|
||||
niah_pt_multikey_3_alias: " - niah_pt_multikey_3"
|
||||
niah_pt_multiquery_1024: 0.162
|
||||
niah_pt_multiquery_1024_stderr: 0.011325570055151673
|
||||
niah_pt_multiquery_2048: 0.0385
|
||||
niah_pt_multiquery_2048_stderr: 0.0059461090155537594
|
||||
niah_pt_multiquery_4096: 0.0295
|
||||
niah_pt_multiquery_4096_stderr: N/A
|
||||
niah_pt_multiquery_alias: " - niah_pt_multiquery"
|
||||
niah_pt_multivalue_1024: 0.0985
|
||||
niah_pt_multivalue_1024_stderr: 0.008944579980191584
|
||||
niah_pt_multivalue_2048: 0.025
|
||||
niah_pt_multivalue_2048_stderr: 0.004721714643893329
|
||||
niah_pt_multivalue_4096: 0.0335
|
||||
niah_pt_multivalue_4096_stderr: N/A
|
||||
niah_pt_multivalue_alias: " - niah_pt_multivalue"
|
||||
niah_pt_single_1_1024: 0.066
|
||||
niah_pt_single_1_1024_stderr: 0.011114633153652916
|
||||
niah_pt_single_1_2048: 0.058
|
||||
niah_pt_single_1_2048_stderr: 0.010463793860924815
|
||||
niah_pt_single_1_4096: 0.058
|
||||
niah_pt_single_1_4096_stderr: N/A
|
||||
niah_pt_single_1_alias: " - niah_pt_single_1"
|
||||
niah_pt_single_2_1024: 0.602
|
||||
niah_pt_single_2_1024_stderr: 0.021912377885779953
|
||||
niah_pt_single_2_2048: 0.466
|
||||
niah_pt_single_2_2048_stderr: 0.022331264423258324
|
||||
niah_pt_single_2_4096: 0.452
|
||||
niah_pt_single_2_4096_stderr: N/A
|
||||
niah_pt_single_2_alias: " - niah_pt_single_2"
|
||||
niah_pt_single_3_1024: 0.496
|
||||
niah_pt_single_3_1024_stderr: 0.022382357781962105
|
||||
niah_pt_single_3_2048: 0.472
|
||||
niah_pt_single_3_2048_stderr: 0.022347949832668024
|
||||
niah_pt_single_3_4096: 0.43
|
||||
niah_pt_single_3_4096_stderr: N/A
|
||||
niah_pt_single_3_alias: " - niah_pt_single_3"
|
||||
ruler_pt_4096: 0.28358181818181816
|
||||
ruler_pt_4096_stderr: N/A
|
||||
ruler_pt_alias: ruler_pt
|
||||
ruler_pt_cwe_1024: 0.3368
|
||||
ruler_pt_cwe_1024_stderr: 0.010680808928106402
|
||||
ruler_pt_cwe_2048: 0.18159999999999998
|
||||
ruler_pt_cwe_2048_stderr: 0.009549096842162718
|
||||
ruler_pt_cwe_4096: 0.186
|
||||
ruler_pt_cwe_4096_stderr: N/A
|
||||
ruler_pt_cwe_alias: " - ruler_pt_cwe"
|
||||
ruler_pt_fwe_1024: 0.6753333333333332
|
||||
ruler_pt_fwe_1024_stderr: 0.01312227747420385
|
||||
ruler_pt_fwe_2048: 0.5393333333333332
|
||||
ruler_pt_fwe_2048_stderr: 0.012449640261503029
|
||||
ruler_pt_fwe_4096: 0.494
|
||||
ruler_pt_fwe_4096_stderr: N/A
|
||||
ruler_pt_fwe_alias: " - ruler_pt_fwe"
|
||||
ruler_pt_vt_1024: 0.9276
|
||||
ruler_pt_vt_1024_stderr: 0.01029798898592869
|
||||
ruler_pt_vt_2048: 0.9256
|
||||
ruler_pt_vt_2048_stderr: 0.009345799480829435
|
||||
ruler_pt_vt_4096: 0.8623999999999999
|
||||
ruler_pt_vt_4096_stderr: N/A
|
||||
ruler_pt_vt_alias: " - ruler_pt_vt"
|
||||
30
special_tokens_map.json
Normal file
30
special_tokens_map.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"bos_token": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eos_token": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"unk_token": {
|
||||
"content": "<|unk|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
463711
tokenizer.json
Normal file
463711
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
397
tokenizer_config.json
Normal file
397
tokenizer_config.json
Normal file
@@ -0,0 +1,397 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_eos_token": false,
|
||||
"add_prefix_space": null,
|
||||
"added_tokens_decoder": {
|
||||
"0": {
|
||||
"content": "<|unk|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"1": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"2": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"49109": {
|
||||
"content": "<|pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"49110": {
|
||||
"content": "<tools>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49111": {
|
||||
"content": "</tools>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49112": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49113": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49114": {
|
||||
"content": "<tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49115": {
|
||||
"content": "</tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49116": {
|
||||
"content": "<think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49117": {
|
||||
"content": "</think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49118": {
|
||||
"content": "<answer>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49119": {
|
||||
"content": "</answer>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49120": {
|
||||
"content": "<context>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49121": {
|
||||
"content": "</context>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49122": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49123": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49124": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49125": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49126": {
|
||||
"content": "<|image|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49127": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49128": {
|
||||
"content": "<|image_placeholder|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49129": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49130": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49131": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49132": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49133": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49134": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49135": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49136": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49137": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49138": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49139": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49140": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49141": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49142": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49143": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49144": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49145": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49146": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49147": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49148": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"49149": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49150": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"49151": {
|
||||
"content": " ",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"bos_token": "<|im_start|>",
|
||||
"bos_token_id": 1,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"eos_token_id": 2,
|
||||
"extra_special_tokens": {},
|
||||
"legacy": false,
|
||||
"model_input_names": [
|
||||
"input_ids",
|
||||
"attention_mask"
|
||||
],
|
||||
"model_max_length": 4096,
|
||||
"pad_token": "<|pad|>",
|
||||
"pad_token_id": 49109,
|
||||
"padding_side": "right",
|
||||
"sp_model_kwargs": {},
|
||||
"spaces_between_special_tokens": false,
|
||||
"tokenizer_class": "PreTrainedTokenizerFast",
|
||||
"truncation_side": "right",
|
||||
"unk_token": "<|unk|>",
|
||||
"unk_token_id": 0,
|
||||
"use_default_system_prompt": false
|
||||
}
|
||||
3
train_logs.parquet
Normal file
3
train_logs.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d3c0968b7c7482ec0a6c83b649911e063ceef11d31b681cfdffe0a3ccdbb551d
|
||||
size 2492246
|
||||
97
training_config.yaml
Normal file
97
training_config.yaml
Normal file
@@ -0,0 +1,97 @@
|
||||
# Directory settings
|
||||
checkpoint_dir: "/lustre/scratch/data/polyglot_datasets/portuguese/checkpoints/models/Tucano2-qwen-1.5B-Base"
|
||||
train_dataset_dir:
|
||||
# Total: ~100B
|
||||
# Web Text (~70B, 70%)
|
||||
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2/3_10b # 12B (PT)
|
||||
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2/4 # 28B (PT)
|
||||
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2/4 # 28B (PT)
|
||||
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2/5 # 0.1B (PT)
|
||||
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2/5 # 0.1B (PT)
|
||||
# Synthetic Text (~30B, 30%)
|
||||
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2_synth # 10B (PT)
|
||||
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2_synth # 10B (PT)
|
||||
- /lustre/scratch/data/polyglot_datasets/portuguese/tokenized/gigaverbo_v2_synth # 10B (PT)
|
||||
val_dataset_dir: "/lustre/scratch/data/polyglot_datasets/portuguese/tokenized/validation"
|
||||
dataset_type: "parquet"
|
||||
cache_dir: "/lustre/mlnvme/data/nanotronics/.cache"
|
||||
|
||||
# Data loading settings
|
||||
pin_memory: true
|
||||
num_workers_for_dataloader: 16
|
||||
shuffle_dataset: true
|
||||
mask_eos_token: false
|
||||
mask_pad_token: false
|
||||
|
||||
# Model architecture settings
|
||||
vocab_size: 49152
|
||||
num_hidden_layers: 28
|
||||
num_attention_heads: 16
|
||||
num_key_value_heads: 8
|
||||
head_dim: 128
|
||||
hidden_size: 2048
|
||||
intermediate_size: 6144
|
||||
max_position_embeddings: 4096
|
||||
tie_word_embeddings: true
|
||||
hidden_act: "silu"
|
||||
output_hidden_states: false
|
||||
attn_implementation: "flash_attention_2"
|
||||
use_cache: false
|
||||
no_rope_layer_interval: null
|
||||
rope_theta: 1000000.0
|
||||
rope_scale_factor: null
|
||||
rms_norm_eps: 0.000001
|
||||
|
||||
# Training settings
|
||||
total_batch_size: 1048576
|
||||
micro_batch_size: 4
|
||||
eval_micro_batch_size: 4
|
||||
num_train_epochs: 1
|
||||
warmup_steps: 200
|
||||
max_learning_rate: 0.0001
|
||||
min_learning_rate: 0.0
|
||||
muon_learning_rate: 0.001
|
||||
weight_decay: 0.1
|
||||
beta1: 0.9
|
||||
beta2: 0.95
|
||||
eps: 0.00000001
|
||||
lr_decay_type: "cosine"
|
||||
use_sqrt: true
|
||||
lr_decay_iters_coef: 1.
|
||||
seed: 42
|
||||
max_steps: 100000
|
||||
max_grad_norm: 1.0
|
||||
|
||||
# Precision and optimization settings
|
||||
torch_compile: false
|
||||
mat_mul_precision: "highest"
|
||||
tf32: true
|
||||
bf16: true
|
||||
gradient_checkpointing: false
|
||||
use_liger_kernel: true
|
||||
static_graph: false
|
||||
|
||||
# Hub settings
|
||||
push_to_hub: false
|
||||
hub_token: null
|
||||
hub_model_id: null
|
||||
|
||||
# Tokenizer and Reference model
|
||||
tokenizer_name_or_path: "/lustre/scratch/data/polyglot_datasets/portuguese/checkpoints/models/Tucano2-qwen-1.5B"
|
||||
chat_template_path: null
|
||||
reference_model: "/lustre/scratch/data/polyglot_datasets/portuguese/checkpoints/models/Tucano2-qwen-1.5B"
|
||||
continual_pretraining: true
|
||||
|
||||
# Checkpoint settings
|
||||
resume_from_checkpoint: null
|
||||
checkpointing_steps: 2500
|
||||
begin_new_stage: false
|
||||
stage_name: "single_cosine"
|
||||
|
||||
# Miscellaneous settings
|
||||
sanity_check: false
|
||||
sanity_check_num_samples: 100000
|
||||
wandb_token: null
|
||||
wandb_id: "tucano2-qwen-1.5b"
|
||||
wandb_project: "Polyglot"
|
||||
wandb_desc: "Developing LLMs for low-resource languages"
|
||||
3
val_logs.parquet
Normal file
3
val_logs.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:376bd8998a1af2ef32f19df0842ae522de0066c2e9d4f4f0b16b0b3fb46c7afb
|
||||
size 2240
|
||||
Reference in New Issue
Block a user