初始化项目,由ModelHub XC社区提供模型
Model: kmseong/Llama3.2-3B-gsm8k-full-FT Source: Original Platform
This commit is contained in:
28
model_metadata.json
Normal file
28
model_metadata.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"model_id": "kmseong/Llama3.2-3B-gsm8k-fullft-like-sn",
|
||||
"base_model": "meta-llama/Llama-3.2-3B-Instruct",
|
||||
"training_method": "Full Parameter Fine-tuning",
|
||||
"dataset": "openai/gsm8k",
|
||||
"upload_date": "2026-02-23T19:57:02.581240",
|
||||
"total_parameters": "3B",
|
||||
"trainable_parameters": "3B (100%)",
|
||||
"training_samples": 7473,
|
||||
"eval_samples": 0,
|
||||
"epochs": 3,
|
||||
"batch_size": 2,
|
||||
"gradient_accumulation_steps": 4,
|
||||
"effective_batch_size": 8,
|
||||
"learning_rate": 2e-05,
|
||||
"optimizer": "adamw_8bit",
|
||||
"scheduler": "cosine",
|
||||
"warmup_ratio": 0.0,
|
||||
"max_length": 512,
|
||||
"dtype": "bfloat16",
|
||||
"test_accuracy": 40.0,
|
||||
"test_samples": 50,
|
||||
"correct_predictions": 20,
|
||||
"framework": "transformers + trl",
|
||||
"trainer": "SFTTrainer",
|
||||
"task": "math-reasoning",
|
||||
"gradient_checkpointing": true
|
||||
}
|
||||
Reference in New Issue
Block a user