Model: Hyeongwon/P9-split2_only_answer_Qwen3-4B-Base_0402-01-5e-6 Source: Original Platform
9 lines
232 B
JSON
9 lines
232 B
JSON
{
|
|
"epoch": 6.0,
|
|
"total_flos": 522066352668672.0,
|
|
"train_loss": 0.5458187263237473,
|
|
"train_runtime": 71514.3468,
|
|
"train_samples": 41166,
|
|
"train_samples_per_second": 3.454,
|
|
"train_steps_per_second": 0.007
|
|
} |