Model: Hyeongwon/P9-split1_only_answer_Qwen3-4B-Base_0402-01-5e-6 Source: Original Platform
9 lines
232 B
JSON
9 lines
232 B
JSON
{
|
|
"epoch": 6.0,
|
|
"total_flos": 515196244262912.0,
|
|
"train_loss": 0.5535088468757088,
|
|
"train_runtime": 94446.5755,
|
|
"train_samples": 41030,
|
|
"train_samples_per_second": 2.607,
|
|
"train_steps_per_second": 0.006
|
|
} |