初始化项目,由ModelHub XC社区提供模型
Model: philipp-zettl/german-structured-output-olmo2-1b Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
61
README.md
Normal file
61
README.md
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
---
|
||||||
|
base_model: allenai/OLMo-2-0425-1B-Instruct
|
||||||
|
library_name: transformers
|
||||||
|
model_name: german-structured-output-olmo2-1b
|
||||||
|
tags:
|
||||||
|
- generated_from_trainer
|
||||||
|
- trackio
|
||||||
|
- hf_jobs
|
||||||
|
- trl
|
||||||
|
- trackio:https://huggingface.co/spaces/philipp-zettl/huggingface-static-6a4806
|
||||||
|
- sft
|
||||||
|
licence: license
|
||||||
|
---
|
||||||
|
|
||||||
|
# Model Card for german-structured-output-olmo2-1b
|
||||||
|
|
||||||
|
This model is a fine-tuned version of [allenai/OLMo-2-0425-1B-Instruct](https://huggingface.co/allenai/OLMo-2-0425-1B-Instruct).
|
||||||
|
It has been trained using [TRL](https://github.com/huggingface/trl).
|
||||||
|
|
||||||
|
## Quick start
|
||||||
|
|
||||||
|
```python
|
||||||
|
from transformers import pipeline
|
||||||
|
|
||||||
|
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
|
||||||
|
generator = pipeline("text-generation", model="philipp-zettl/german-structured-output-olmo2-1b", device="cuda")
|
||||||
|
output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
|
||||||
|
print(output["generated_text"])
|
||||||
|
```
|
||||||
|
|
||||||
|
## Training procedure
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
This model was trained with SFT.
|
||||||
|
|
||||||
|
### Framework versions
|
||||||
|
|
||||||
|
- TRL: 1.2.0
|
||||||
|
- Transformers: 5.6.1
|
||||||
|
- Pytorch: 2.11.0
|
||||||
|
- Datasets: 4.8.4
|
||||||
|
- Tokenizers: 0.22.2
|
||||||
|
|
||||||
|
## Citations
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Cite TRL as:
|
||||||
|
|
||||||
|
```bibtex
|
||||||
|
@software{vonwerra2020trl,
|
||||||
|
title = {{TRL: Transformers Reinforcement Learning}},
|
||||||
|
author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
|
||||||
|
license = {Apache-2.0},
|
||||||
|
url = {https://github.com/huggingface/trl},
|
||||||
|
year = {2020}
|
||||||
|
}
|
||||||
|
```
|
||||||
9
chat_template.jinja
Normal file
9
chat_template.jinja
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{{ bos_token }}{% for message in messages %}{% if message['role'] == 'system' %}{{ '<|system|>
|
||||||
|
' + message['content'] + '
|
||||||
|
' }}{% elif message['role'] == 'user' %}{{ '<|user|>
|
||||||
|
' + message['content'] + '
|
||||||
|
' }}{% elif message['role'] == 'assistant' %}{% if not loop.last %}{{ '<|assistant|>
|
||||||
|
' + message['content'] + eos_token + '
|
||||||
|
' }}{% else %}{{ '<|assistant|>
|
||||||
|
' + message['content'] + eos_token }}{% endif %}{% endif %}{% if loop.last and add_generation_prompt %}{{ '<|assistant|>
|
||||||
|
' }}{% endif %}{% endfor %}
|
||||||
29
config.json
Normal file
29
config.json
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
{
|
||||||
|
"architectures": [
|
||||||
|
"Olmo2ForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_bias": false,
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": 100257,
|
||||||
|
"dtype": "bfloat16",
|
||||||
|
"eos_token_id": 100257,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 2048,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 8192,
|
||||||
|
"max_position_embeddings": 4096,
|
||||||
|
"model_type": "olmo2",
|
||||||
|
"num_attention_heads": 16,
|
||||||
|
"num_hidden_layers": 16,
|
||||||
|
"num_key_value_heads": 16,
|
||||||
|
"pad_token_id": 100277,
|
||||||
|
"rms_norm_eps": 1e-06,
|
||||||
|
"rope_parameters": {
|
||||||
|
"rope_theta": 500000,
|
||||||
|
"rope_type": "default"
|
||||||
|
},
|
||||||
|
"tie_word_embeddings": false,
|
||||||
|
"transformers_version": "5.6.1",
|
||||||
|
"use_cache": false,
|
||||||
|
"vocab_size": 100352
|
||||||
|
}
|
||||||
9
generation_config.json
Normal file
9
generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 100257,
|
||||||
|
"eos_token_id": [
|
||||||
|
100257
|
||||||
|
],
|
||||||
|
"pad_token_id": 100277,
|
||||||
|
"transformers_version": "5.6.1"
|
||||||
|
}
|
||||||
3
model.safetensors
Normal file
3
model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:60e9d083fec2eae48451013015c3c4f0acff6c19acc0e37c03dcd769df25e2ff
|
||||||
|
size 2969854224
|
||||||
500536
tokenizer.json
Normal file
500536
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
14
tokenizer_config.json
Normal file
14
tokenizer_config.json
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
{
|
||||||
|
"add_prefix_space": false,
|
||||||
|
"backend": "tokenizers",
|
||||||
|
"bos_token": "<|endoftext|>",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "<|endoftext|>",
|
||||||
|
"errors": "replace",
|
||||||
|
"is_local": false,
|
||||||
|
"local_files_only": false,
|
||||||
|
"model_max_length": 1000000000000000019884624838656,
|
||||||
|
"pad_token": "<|pad|>",
|
||||||
|
"tokenizer_class": "GPT2Tokenizer",
|
||||||
|
"unk_token": "<|endoftext|>"
|
||||||
|
}
|
||||||
257
train.py
Normal file
257
train.py
Normal file
@@ -0,0 +1,257 @@
|
|||||||
|
"""
|
||||||
|
German Structured Output SFT Training
|
||||||
|
======================================
|
||||||
|
EU AI Act Article 53 compliant fine-tuning of OLMo 2 1B Instruct
|
||||||
|
on philipp-zettl/german-structured-output dataset.
|
||||||
|
|
||||||
|
Base model: allenai/OLMo-2-0425-1B-Instruct (Apache 2.0)
|
||||||
|
- Pretraining data: allenai/olmo-mix-1124 (ODC-BY, fully documented)
|
||||||
|
- Every data source has explicit license: DCLM (CC-BY-4.0), ArXiv (ODC-BY),
|
||||||
|
PeS2o (ODC-BY), StarCoder (ODC-BY), Wikipedia (ODC-BY), etc.
|
||||||
|
- Full training logs, code, and intermediate checkpoints published by AI2
|
||||||
|
|
||||||
|
Dataset: philipp-zettl/german-structured-output (CC BY-SA 4.0)
|
||||||
|
- All sources documented with licenses per example
|
||||||
|
- Zero PII, GDPR compliant
|
||||||
|
|
||||||
|
Method: Full SFT with gradient checkpointing
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import torch
|
||||||
|
import statistics
|
||||||
|
|
||||||
|
# Force unbuffered output so logs stream in real-time
|
||||||
|
os.environ["PYTHONUNBUFFERED"] = "1"
|
||||||
|
|
||||||
|
print("=" * 60, flush=True)
|
||||||
|
print("Starting German Structured Output SFT Training", flush=True)
|
||||||
|
print("=" * 60, flush=True)
|
||||||
|
|
||||||
|
# Verify HF_TOKEN is available (needed for private dataset + push_to_hub)
|
||||||
|
token = os.environ.get("HF_TOKEN")
|
||||||
|
if not token:
|
||||||
|
print("ERROR: HF_TOKEN not set! Cannot access private dataset or push model.", flush=True)
|
||||||
|
print("Set via: export HF_TOKEN=hf_... or pass --env HF_TOKEN=hf_...", flush=True)
|
||||||
|
sys.exit(1)
|
||||||
|
else:
|
||||||
|
print(f"HF_TOKEN: present ({token[:8]}...)", flush=True)
|
||||||
|
|
||||||
|
# Check GPU
|
||||||
|
print(f"CUDA available: {torch.cuda.is_available()}", flush=True)
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
print(f"GPU: {torch.cuda.get_device_name()}", flush=True)
|
||||||
|
print(f"VRAM: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB", flush=True)
|
||||||
|
else:
|
||||||
|
print("WARNING: No GPU detected! Training will be very slow.", flush=True)
|
||||||
|
|
||||||
|
from datasets import load_dataset
|
||||||
|
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||||
|
from trl import SFTConfig, SFTTrainer
|
||||||
|
|
||||||
|
print("All imports OK", flush=True)
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Configuration
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
MODEL_ID = "allenai/OLMo-2-0425-1B-Instruct"
|
||||||
|
DATASET_ID = "philipp-zettl/german-structured-output"
|
||||||
|
OUTPUT_MODEL_ID = "philipp-zettl/german-structured-output-olmo2-1b"
|
||||||
|
OUTPUT_DIR = "./output"
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Trackio monitoring — writes to HF Bucket for persistent storage
|
||||||
|
# ============================================================================
|
||||||
|
print("Initializing Trackio...", flush=True)
|
||||||
|
import trackio
|
||||||
|
trackio.init(
|
||||||
|
project="german-structured-output-sft",
|
||||||
|
name="olmo2-1b-full-sft",
|
||||||
|
space_id="philipp-zettl/german-structured-output-training",
|
||||||
|
bucket_id="philipp-zettl/german-structured-output-training-bucket",
|
||||||
|
)
|
||||||
|
print("Trackio initialized with bucket storage", flush=True)
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Load dataset
|
||||||
|
# ============================================================================
|
||||||
|
print("Loading dataset...", flush=True)
|
||||||
|
dataset = load_dataset(DATASET_ID)
|
||||||
|
print(f"Train: {len(dataset['train'])}, Val: {len(dataset['validation'])}, Test: {len(dataset['test'])}", flush=True)
|
||||||
|
|
||||||
|
# Quick data audit
|
||||||
|
print("\nSample messages structure:", flush=True)
|
||||||
|
sample = dataset["train"][0]
|
||||||
|
for msg in sample["messages"]:
|
||||||
|
print(f" [{msg['role']}]: {msg['content'][:100]}...", flush=True)
|
||||||
|
print(f" task_type: {sample['task_type']}", flush=True)
|
||||||
|
print(f" quality_score: {sample['quality_score']}", flush=True)
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Load model and tokenizer
|
||||||
|
# ============================================================================
|
||||||
|
print(f"\nLoading model: {MODEL_ID}", flush=True)
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
MODEL_ID,
|
||||||
|
torch_dtype=torch.bfloat16,
|
||||||
|
attn_implementation="sdpa",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Verify chat template exists
|
||||||
|
print(f"Chat template available: {tokenizer.chat_template is not None}", flush=True)
|
||||||
|
print(f"Model parameters: {sum(p.numel() for p in model.parameters()) / 1e6:.0f}M", flush=True)
|
||||||
|
|
||||||
|
# Check sequence lengths in dataset to set max_length appropriately
|
||||||
|
print("\nAnalyzing sequence lengths...", flush=True)
|
||||||
|
lengths = []
|
||||||
|
for i, example in enumerate(dataset["train"]):
|
||||||
|
text = tokenizer.apply_chat_template(example["messages"], tokenize=False)
|
||||||
|
tokens = tokenizer(text, return_length=True)
|
||||||
|
lengths.append(tokens["length"][0])
|
||||||
|
if i >= 200: # Sample first 200
|
||||||
|
break
|
||||||
|
|
||||||
|
print(f" Sampled {len(lengths)} examples", flush=True)
|
||||||
|
print(f" Min: {min(lengths)}, Max: {max(lengths)}, Mean: {statistics.mean(lengths):.0f}, Median: {statistics.median(lengths):.0f}", flush=True)
|
||||||
|
print(f" P95: {sorted(lengths)[int(len(lengths)*0.95)]}", flush=True)
|
||||||
|
print(f" P99: {sorted(lengths)[int(len(lengths)*0.99)]}", flush=True)
|
||||||
|
|
||||||
|
# Set max_length to cover P99 + margin, cap at 2048
|
||||||
|
MAX_LENGTH = min(sorted(lengths)[int(len(lengths)*0.99)] + 128, 2048)
|
||||||
|
print(f" Using max_length: {MAX_LENGTH}", flush=True)
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Training configuration
|
||||||
|
# ============================================================================
|
||||||
|
training_args = SFTConfig(
|
||||||
|
output_dir=OUTPUT_DIR,
|
||||||
|
|
||||||
|
# Precision
|
||||||
|
bf16=True,
|
||||||
|
|
||||||
|
# Sequence
|
||||||
|
max_length=MAX_LENGTH,
|
||||||
|
packing=False, # No packing — variable length structured outputs
|
||||||
|
|
||||||
|
# Batch size: effective = 4 * 4 = 16
|
||||||
|
per_device_train_batch_size=4,
|
||||||
|
gradient_accumulation_steps=4,
|
||||||
|
|
||||||
|
# Optimizer
|
||||||
|
learning_rate=2e-5,
|
||||||
|
lr_scheduler_type="cosine",
|
||||||
|
warmup_ratio=0.05,
|
||||||
|
weight_decay=0.01,
|
||||||
|
max_grad_norm=1.0,
|
||||||
|
|
||||||
|
# Epochs — small dataset (4K), more epochs to converge
|
||||||
|
num_train_epochs=5,
|
||||||
|
|
||||||
|
# Evaluation
|
||||||
|
eval_strategy="epoch",
|
||||||
|
eval_on_start=True,
|
||||||
|
|
||||||
|
# Saving
|
||||||
|
save_strategy="epoch",
|
||||||
|
save_total_limit=2,
|
||||||
|
load_best_model_at_end=True,
|
||||||
|
metric_for_best_model="eval_loss",
|
||||||
|
greater_is_better=False,
|
||||||
|
|
||||||
|
# Logging — plain text, no tqdm
|
||||||
|
logging_strategy="steps",
|
||||||
|
logging_steps=10,
|
||||||
|
logging_first_step=True,
|
||||||
|
disable_tqdm=True,
|
||||||
|
report_to="trackio",
|
||||||
|
run_name="olmo2-1b-german-structured-output",
|
||||||
|
|
||||||
|
# Memory
|
||||||
|
gradient_checkpointing=True,
|
||||||
|
|
||||||
|
# Hub — CRITICAL: push_to_hub to save model (ephemeral job storage)
|
||||||
|
push_to_hub=True,
|
||||||
|
hub_model_id=OUTPUT_MODEL_ID,
|
||||||
|
hub_strategy="end",
|
||||||
|
|
||||||
|
# NEFTune for small dataset regularization (paper: arxiv 2310.05914)
|
||||||
|
neftune_noise_alpha=5.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Trainer
|
||||||
|
# ============================================================================
|
||||||
|
print("\nInitializing SFTTrainer...", flush=True)
|
||||||
|
trainer = SFTTrainer(
|
||||||
|
model=model,
|
||||||
|
processing_class=tokenizer,
|
||||||
|
args=training_args,
|
||||||
|
train_dataset=dataset["train"],
|
||||||
|
eval_dataset=dataset["validation"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Print training summary
|
||||||
|
total_steps = len(dataset["train"]) // (training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps) * int(training_args.num_train_epochs)
|
||||||
|
print(f"\n{'='*60}", flush=True)
|
||||||
|
print(f"Training Summary", flush=True)
|
||||||
|
print(f"{'='*60}", flush=True)
|
||||||
|
print(f" Base model: {MODEL_ID}", flush=True)
|
||||||
|
print(f" Base model license: Apache 2.0", flush=True)
|
||||||
|
print(f" Base model training data: allenai/olmo-mix-1124 (ODC-BY)", flush=True)
|
||||||
|
print(f" Dataset: {DATASET_ID}", flush=True)
|
||||||
|
print(f" Dataset license: CC BY-SA 4.0", flush=True)
|
||||||
|
print(f" Output model: {OUTPUT_MODEL_ID}", flush=True)
|
||||||
|
print(f" Train examples: {len(dataset['train'])}", flush=True)
|
||||||
|
print(f" Val examples: {len(dataset['validation'])}", flush=True)
|
||||||
|
print(f" Max length: {MAX_LENGTH}", flush=True)
|
||||||
|
print(f" Effective batch size: {training_args.per_device_train_batch_size} x {training_args.gradient_accumulation_steps} = {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}", flush=True)
|
||||||
|
print(f" Learning rate: {training_args.learning_rate}", flush=True)
|
||||||
|
print(f" Epochs: {training_args.num_train_epochs}", flush=True)
|
||||||
|
print(f" Estimated steps: ~{total_steps}", flush=True)
|
||||||
|
print(f" NEFTune alpha: {training_args.neftune_noise_alpha}", flush=True)
|
||||||
|
print(f" EU AI Act: Article 53 compliant (full data provenance chain)", flush=True)
|
||||||
|
print(f"{'='*60}", flush=True)
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Train!
|
||||||
|
# ============================================================================
|
||||||
|
print("\nStarting training...", flush=True)
|
||||||
|
train_result = trainer.train()
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Save and push
|
||||||
|
# ============================================================================
|
||||||
|
print("\nSaving model...", flush=True)
|
||||||
|
trainer.save_model()
|
||||||
|
|
||||||
|
# Log final metrics
|
||||||
|
metrics = train_result.metrics
|
||||||
|
print(f"\nTraining complete!", flush=True)
|
||||||
|
print(f" Train loss: {metrics.get('train_loss', 'N/A')}", flush=True)
|
||||||
|
print(f" Train runtime: {metrics.get('train_runtime', 'N/A'):.0f}s", flush=True)
|
||||||
|
print(f" Train samples/sec: {metrics.get('train_samples_per_second', 'N/A'):.1f}", flush=True)
|
||||||
|
|
||||||
|
# Evaluate
|
||||||
|
print("\nRunning final evaluation...", flush=True)
|
||||||
|
eval_metrics = trainer.evaluate()
|
||||||
|
print(f" Eval loss: {eval_metrics.get('eval_loss', 'N/A')}", flush=True)
|
||||||
|
|
||||||
|
# Push to hub
|
||||||
|
print(f"\nPushing to Hub: {OUTPUT_MODEL_ID}", flush=True)
|
||||||
|
trainer.push_to_hub(
|
||||||
|
commit_message="EU AI Act compliant SFT: OLMo 2 1B on german-structured-output",
|
||||||
|
tags=[
|
||||||
|
"german", "structured-output", "json", "function-calling",
|
||||||
|
"ner", "relation-extraction", "gdpr-anonymization",
|
||||||
|
"eu-ai-act-compliant", "gdpr-compliant", "sft",
|
||||||
|
"olmo2", "article-53"
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
print("\n✅ Training complete and model pushed to Hub!", flush=True)
|
||||||
|
print(f" Model: https://huggingface.co/{OUTPUT_MODEL_ID}", flush=True)
|
||||||
|
print(f" Trackio: https://huggingface.co/spaces/philipp-zettl/german-structured-output-training", flush=True)
|
||||||
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:022802dfe5deb48d1b90fab3e0b01f9e32604a91ae639f58a6adc24272409a80
|
||||||
|
size 5841
|
||||||
Reference in New Issue
Block a user