Model: jhhj25/qwen3-moe-neuron_structure_drop-p50-s1k-128samples-sft Source: Original Platform
8 lines
211 B
JSON
8 lines
211 B
JSON
{
|
|
"total_flos": 63845040947200.0,
|
|
"train_loss": 0.868978128804789,
|
|
"train_runtime": 18382.0346,
|
|
"train_samples": 1000,
|
|
"train_samples_per_second": 0.333,
|
|
"train_steps_per_second": 0.021
|
|
} |