初始化项目，由ModelHub XC社区提供模型

Model: kmseong/Llama3.2-3B-gsm8k-full-FT Source: Original Platform
2026-04-13 08:05:05 +08:00
commit 1847568cd2
12 changed files with 2743 additions and 0 deletions
--- a/finetune_config.json
+++ b/finetune_config.json
@@ -0,0 +1,19 @@
+{
+  "base_model": "meta-llama/Llama-3.2-3B",
+  "fine_tuning_type": "Full Parameter Fine-tuning",
+  "dataset": "GSM8K",
+  "num_train_samples": 7473,
+  "batch_size": 2,
+  "grad_accum": 4,
+  "learning_rate": 1e-05,
+  "weight_decay": 0.01,
+  "warmup_ratio": 0.1,
+  "epochs": 3,
+  "max_length": 512,
+  "max_grad_norm": 1.0,
+  "lr_scheduler_type": "cosine",
+  "optimizer": "adamw_bnb_8bit",
+  "gradient_checkpointing": true,
+  "dtype": "bf16",
+  "trainer_type": "Trainer"
+}