Model: ali-elganzory/open-sci-ref-v0.02-1.7b-nemotron-hq-300B-16384-rope_theta-1M-long_sft_16k Source: Original Platform
8 lines
203 B
JSON
8 lines
203 B
JSON
{
|
|
"epoch": 1.0,
|
|
"total_flos": 889480391426048.0,
|
|
"train_loss": 0.9303844255645997,
|
|
"train_runtime": 18614.4314,
|
|
"train_samples_per_second": 2.79,
|
|
"train_steps_per_second": 0.087
|
|
} |