初始化项目,由ModelHub XC社区提供模型
Model: pharaouk/untitled-7B Source: Original Platform
This commit is contained in:
343
trainer_state.json
Normal file
343
trainer_state.json
Normal file
@@ -0,0 +1,343 @@
|
||||
{
|
||||
"best_metric": null,
|
||||
"best_model_checkpoint": null,
|
||||
"epoch": 0.006615506747816882,
|
||||
"eval_steps": 756,
|
||||
"global_step": 50,
|
||||
"is_hyper_param_search": false,
|
||||
"is_local_process_zero": true,
|
||||
"is_world_process_zero": true,
|
||||
"log_history": [
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 0.0,
|
||||
"loss": 0.929,
|
||||
"step": 1
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"eval_loss": 0.9224275946617126,
|
||||
"eval_runtime": 2.17,
|
||||
"eval_samples_per_second": 79.724,
|
||||
"eval_steps_per_second": 3.687,
|
||||
"step": 1
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"eval_bench_accuracy_agieval": 0.288135593220339,
|
||||
"eval_bench_accuracy_arc_challenge": 0.8148148148148148,
|
||||
"eval_bench_accuracy_arc_easy": 0.9074074074074074,
|
||||
"eval_bench_accuracy_bigbench": 0.3442622950819672,
|
||||
"eval_bench_accuracy_boolq": 0.5185185185185185,
|
||||
"eval_bench_accuracy_mmlu": 0.48148148148148145,
|
||||
"eval_bench_accuracy_openbookqa": 0.14814814814814814,
|
||||
"eval_bench_accuracy_truthful_qa": 0.37735849056603776,
|
||||
"eval_bench_accuracy_winogrande": 0.4074074074074074,
|
||||
"eval_bench_average_accuracy": 0.4763926840717912,
|
||||
"eval_bench_loss": 5.786159653261484,
|
||||
"eval_bench_total_accuracy": 0.47283702213279677,
|
||||
"step": 1
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 6.000000000000001e-07,
|
||||
"loss": 0.8533,
|
||||
"step": 2
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 1.2000000000000002e-06,
|
||||
"loss": 0.9641,
|
||||
"step": 3
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 1.8e-06,
|
||||
"loss": 0.8488,
|
||||
"step": 4
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 2.4000000000000003e-06,
|
||||
"loss": 0.8863,
|
||||
"step": 5
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 3e-06,
|
||||
"loss": 0.7988,
|
||||
"step": 6
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 3.6e-06,
|
||||
"loss": 0.7789,
|
||||
"step": 7
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 4.2e-06,
|
||||
"loss": 0.7144,
|
||||
"step": 8
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 4.800000000000001e-06,
|
||||
"loss": 0.8322,
|
||||
"step": 9
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.4e-06,
|
||||
"loss": 0.734,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 6e-06,
|
||||
"loss": 0.7861,
|
||||
"step": 11
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.999602806831722e-06,
|
||||
"loss": 0.6733,
|
||||
"step": 12
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.999205613663445e-06,
|
||||
"loss": 0.7019,
|
||||
"step": 13
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.9988084204951675e-06,
|
||||
"loss": 0.7096,
|
||||
"step": 14
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.99841122732689e-06,
|
||||
"loss": 0.6745,
|
||||
"step": 15
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.998014034158613e-06,
|
||||
"loss": 0.8022,
|
||||
"step": 16
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.997616840990336e-06,
|
||||
"loss": 0.7753,
|
||||
"step": 17
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.997219647822058e-06,
|
||||
"loss": 0.6939,
|
||||
"step": 18
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.99682245465378e-06,
|
||||
"loss": 0.689,
|
||||
"step": 19
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.996425261485502e-06,
|
||||
"loss": 0.7419,
|
||||
"step": 20
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.996028068317225e-06,
|
||||
"loss": 0.6975,
|
||||
"step": 21
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.9956308751489475e-06,
|
||||
"loss": 0.686,
|
||||
"step": 22
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.99523368198067e-06,
|
||||
"loss": 0.7576,
|
||||
"step": 23
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.994836488812393e-06,
|
||||
"loss": 0.6802,
|
||||
"step": 24
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.994439295644115e-06,
|
||||
"loss": 0.711,
|
||||
"step": 25
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.994042102475838e-06,
|
||||
"loss": 0.6658,
|
||||
"step": 26
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.99364490930756e-06,
|
||||
"loss": 0.685,
|
||||
"step": 27
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.993247716139283e-06,
|
||||
"loss": 0.6881,
|
||||
"step": 28
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.992850522971005e-06,
|
||||
"loss": 0.7066,
|
||||
"step": 29
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.992453329802727e-06,
|
||||
"loss": 0.6993,
|
||||
"step": 30
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.99205613663445e-06,
|
||||
"loss": 0.6429,
|
||||
"step": 31
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.991658943466173e-06,
|
||||
"loss": 0.7205,
|
||||
"step": 32
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.991261750297895e-06,
|
||||
"loss": 0.703,
|
||||
"step": 33
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.990864557129617e-06,
|
||||
"loss": 0.6512,
|
||||
"step": 34
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.99046736396134e-06,
|
||||
"loss": 0.6583,
|
||||
"step": 35
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.990070170793063e-06,
|
||||
"loss": 0.6904,
|
||||
"step": 36
|
||||
},
|
||||
{
|
||||
"epoch": 0.0,
|
||||
"learning_rate": 5.989672977624785e-06,
|
||||
"loss": 0.6619,
|
||||
"step": 37
|
||||
},
|
||||
{
|
||||
"epoch": 0.01,
|
||||
"learning_rate": 5.989275784456507e-06,
|
||||
"loss": 0.7033,
|
||||
"step": 38
|
||||
},
|
||||
{
|
||||
"epoch": 0.01,
|
||||
"learning_rate": 5.98887859128823e-06,
|
||||
"loss": 0.6522,
|
||||
"step": 39
|
||||
},
|
||||
{
|
||||
"epoch": 0.01,
|
||||
"learning_rate": 5.9884813981199526e-06,
|
||||
"loss": 0.6365,
|
||||
"step": 40
|
||||
},
|
||||
{
|
||||
"epoch": 0.01,
|
||||
"learning_rate": 5.988084204951675e-06,
|
||||
"loss": 0.6422,
|
||||
"step": 41
|
||||
},
|
||||
{
|
||||
"epoch": 0.01,
|
||||
"learning_rate": 5.987687011783397e-06,
|
||||
"loss": 0.625,
|
||||
"step": 42
|
||||
},
|
||||
{
|
||||
"epoch": 0.01,
|
||||
"learning_rate": 5.98728981861512e-06,
|
||||
"loss": 0.6148,
|
||||
"step": 43
|
||||
},
|
||||
{
|
||||
"epoch": 0.01,
|
||||
"learning_rate": 5.986892625446843e-06,
|
||||
"loss": 0.7089,
|
||||
"step": 44
|
||||
},
|
||||
{
|
||||
"epoch": 0.01,
|
||||
"learning_rate": 5.986495432278565e-06,
|
||||
"loss": 0.681,
|
||||
"step": 45
|
||||
},
|
||||
{
|
||||
"epoch": 0.01,
|
||||
"learning_rate": 5.986098239110287e-06,
|
||||
"loss": 0.6505,
|
||||
"step": 46
|
||||
},
|
||||
{
|
||||
"epoch": 0.01,
|
||||
"learning_rate": 5.98570104594201e-06,
|
||||
"loss": 0.6531,
|
||||
"step": 47
|
||||
},
|
||||
{
|
||||
"epoch": 0.01,
|
||||
"learning_rate": 5.9853038527737325e-06,
|
||||
"loss": 0.6683,
|
||||
"step": 48
|
||||
},
|
||||
{
|
||||
"epoch": 0.01,
|
||||
"learning_rate": 5.984906659605455e-06,
|
||||
"loss": 0.6967,
|
||||
"step": 49
|
||||
},
|
||||
{
|
||||
"epoch": 0.01,
|
||||
"learning_rate": 5.984509466437178e-06,
|
||||
"loss": 0.6514,
|
||||
"step": 50
|
||||
}
|
||||
],
|
||||
"logging_steps": 1,
|
||||
"max_steps": 15116,
|
||||
"num_train_epochs": 2,
|
||||
"save_steps": 50,
|
||||
"total_flos": 2.097021406150656e+17,
|
||||
"trial_name": null,
|
||||
"trial_params": null
|
||||
}
|
||||
Reference in New Issue
Block a user