28 lines
803 B
JSON
28 lines
803 B
JSON
{
|
|
"model_id": "kmseong/Llama3.2-3B-gsm8k-fullft-like-sn",
|
|
"base_model": "meta-llama/Llama-3.2-3B-Instruct",
|
|
"training_method": "Full Parameter Fine-tuning",
|
|
"dataset": "openai/gsm8k",
|
|
"upload_date": "2026-02-23T19:57:02.581240",
|
|
"total_parameters": "3B",
|
|
"trainable_parameters": "3B (100%)",
|
|
"training_samples": 7473,
|
|
"eval_samples": 0,
|
|
"epochs": 3,
|
|
"batch_size": 2,
|
|
"gradient_accumulation_steps": 4,
|
|
"effective_batch_size": 8,
|
|
"learning_rate": 2e-05,
|
|
"optimizer": "adamw_8bit",
|
|
"scheduler": "cosine",
|
|
"warmup_ratio": 0.0,
|
|
"max_length": 512,
|
|
"dtype": "bfloat16",
|
|
"test_accuracy": 40.0,
|
|
"test_samples": 50,
|
|
"correct_predictions": 20,
|
|
"framework": "transformers + trl",
|
|
"trainer": "SFTTrainer",
|
|
"task": "math-reasoning",
|
|
"gradient_checkpointing": true
|
|
} |