初始化项目,由ModelHub XC社区提供模型
Model: simonneupane/controlled-food-recipe-generation Source: Original Platform
This commit is contained in:
15
last-checkpoint/added_tokens.json
Normal file
15
last-checkpoint/added_tokens.json
Normal file
@@ -0,0 +1,15 @@
|
||||
{
|
||||
"<INGR_END>": 50264,
|
||||
"<INGR_START>": 50262,
|
||||
"<INPUT_END>": 50259,
|
||||
"<INPUT_START>": 50257,
|
||||
"<INSTR_END>": 50267,
|
||||
"<INSTR_START>": 50265,
|
||||
"<NEXT_INGR>": 50263,
|
||||
"<NEXT_INPUT>": 50258,
|
||||
"<NEXT_STEP>": 50266,
|
||||
"<RECIPE_END>": 50269,
|
||||
"<RECIPE_START>": 50268,
|
||||
"<TITLE_END>": 50261,
|
||||
"<TITLE_START>": 50260
|
||||
}
|
||||
38
last-checkpoint/config.json
Normal file
38
last-checkpoint/config.json
Normal file
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"activation_function": "gelu_new",
|
||||
"architectures": [
|
||||
"GPT2LMHeadModel"
|
||||
],
|
||||
"attn_pdrop": 0.1,
|
||||
"bos_token_id": 50256,
|
||||
"embd_pdrop": 0.1,
|
||||
"eos_token_id": 50256,
|
||||
"initializer_range": 0.02,
|
||||
"layer_norm_epsilon": 1e-05,
|
||||
"model_type": "gpt2",
|
||||
"n_ctx": 1024,
|
||||
"n_embd": 768,
|
||||
"n_head": 12,
|
||||
"n_inner": null,
|
||||
"n_layer": 12,
|
||||
"n_positions": 1024,
|
||||
"reorder_and_upcast_attn": false,
|
||||
"resid_pdrop": 0.1,
|
||||
"scale_attn_by_inverse_layer_idx": false,
|
||||
"scale_attn_weights": true,
|
||||
"summary_activation": null,
|
||||
"summary_first_dropout": 0.1,
|
||||
"summary_proj_to_labels": true,
|
||||
"summary_type": "cls_index",
|
||||
"summary_use_proj": true,
|
||||
"task_specific_params": {
|
||||
"text-generation": {
|
||||
"do_sample": true,
|
||||
"max_length": 50
|
||||
}
|
||||
},
|
||||
"torch_dtype": "float32",
|
||||
"transformers_version": "4.52.4",
|
||||
"use_cache": true,
|
||||
"vocab_size": 50270
|
||||
}
|
||||
6
last-checkpoint/generation_config.json
Normal file
6
last-checkpoint/generation_config.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"_from_model_config": true,
|
||||
"bos_token_id": 50256,
|
||||
"eos_token_id": 50256,
|
||||
"transformers_version": "4.52.4"
|
||||
}
|
||||
50001
last-checkpoint/merges.txt
Normal file
50001
last-checkpoint/merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
last-checkpoint/model.safetensors
Normal file
3
last-checkpoint/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:31374e11cab95da4335286559400d4ef35bb288afc215f0d0021f27efc3e85cc
|
||||
size 497814144
|
||||
3
last-checkpoint/optimizer.pt
Normal file
3
last-checkpoint/optimizer.pt
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2683f4879e9b48ffc355a648377c9029f351277f0a969be4564f844da3da4d8a
|
||||
size 995722170
|
||||
3
last-checkpoint/rng_state.pth
Normal file
3
last-checkpoint/rng_state.pth
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:995b07f3cfc3baacff0e258cdaec63824fdb43fb33ccabe8297d597e04955aea
|
||||
size 14244
|
||||
3
last-checkpoint/scaler.pt
Normal file
3
last-checkpoint/scaler.pt
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b30172cf14f5dbe00280d63e36224a9f28dc7a0e8b38a74ceb5eb284e84da363
|
||||
size 988
|
||||
3
last-checkpoint/scheduler.pt
Normal file
3
last-checkpoint/scheduler.pt
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:7c910fc6b91130eeca68181cc3c87a2e24b14e94547f06fd96621073d501c834
|
||||
size 1064
|
||||
24
last-checkpoint/special_tokens_map.json
Normal file
24
last-checkpoint/special_tokens_map.json
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"bos_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eos_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": "<|endoftext|>",
|
||||
"unk_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
127
last-checkpoint/tokenizer_config.json
Normal file
127
last-checkpoint/tokenizer_config.json
Normal file
@@ -0,0 +1,127 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"50256": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"50257": {
|
||||
"content": "<INPUT_START>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"50258": {
|
||||
"content": "<NEXT_INPUT>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"50259": {
|
||||
"content": "<INPUT_END>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"50260": {
|
||||
"content": "<TITLE_START>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"50261": {
|
||||
"content": "<TITLE_END>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"50262": {
|
||||
"content": "<INGR_START>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"50263": {
|
||||
"content": "<NEXT_INGR>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"50264": {
|
||||
"content": "<INGR_END>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"50265": {
|
||||
"content": "<INSTR_START>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"50266": {
|
||||
"content": "<NEXT_STEP>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"50267": {
|
||||
"content": "<INSTR_END>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"50268": {
|
||||
"content": "<RECIPE_START>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"50269": {
|
||||
"content": "<RECIPE_END>",
|
||||
"lstrip": false,
|
||||
"normalized": true,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"bos_token": "<|endoftext|>",
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|endoftext|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 1024,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"tokenizer_class": "GPT2Tokenizer",
|
||||
"unk_token": "<|endoftext|>"
|
||||
}
|
||||
804
last-checkpoint/trainer_state.json
Normal file
804
last-checkpoint/trainer_state.json
Normal file
@@ -0,0 +1,804 @@
|
||||
{
|
||||
"best_global_step": null,
|
||||
"best_metric": null,
|
||||
"best_model_checkpoint": null,
|
||||
"epoch": 0.8764800560947236,
|
||||
"eval_steps": 500,
|
||||
"global_step": 55000,
|
||||
"is_hyper_param_search": false,
|
||||
"is_local_process_zero": true,
|
||||
"is_world_process_zero": true,
|
||||
"log_history": [
|
||||
{
|
||||
"epoch": 0.007968000509952033,
|
||||
"grad_norm": 118280.5625,
|
||||
"learning_rate": 2.9936381561814055e-05,
|
||||
"loss": 1.2421,
|
||||
"step": 500
|
||||
},
|
||||
{
|
||||
"epoch": 0.015936001019904066,
|
||||
"grad_norm": 93972.8125,
|
||||
"learning_rate": 2.9856659208197584e-05,
|
||||
"loss": 0.7745,
|
||||
"step": 1000
|
||||
},
|
||||
{
|
||||
"epoch": 0.023904001529856098,
|
||||
"grad_norm": 78047.78125,
|
||||
"learning_rate": 2.9776936854581113e-05,
|
||||
"loss": 0.73,
|
||||
"step": 1500
|
||||
},
|
||||
{
|
||||
"epoch": 0.03187200203980813,
|
||||
"grad_norm": 72831.09375,
|
||||
"learning_rate": 2.9697214500964642e-05,
|
||||
"loss": 0.7087,
|
||||
"step": 2000
|
||||
},
|
||||
{
|
||||
"epoch": 0.03984000254976016,
|
||||
"grad_norm": 62310.4453125,
|
||||
"learning_rate": 2.961749214734817e-05,
|
||||
"loss": 0.6964,
|
||||
"step": 2500
|
||||
},
|
||||
{
|
||||
"epoch": 0.047808003059712195,
|
||||
"grad_norm": 65517.09375,
|
||||
"learning_rate": 2.9537769793731697e-05,
|
||||
"loss": 0.6858,
|
||||
"step": 3000
|
||||
},
|
||||
{
|
||||
"epoch": 0.05577600356966423,
|
||||
"grad_norm": 57505.90625,
|
||||
"learning_rate": 2.9458047440115226e-05,
|
||||
"loss": 0.6761,
|
||||
"step": 3500
|
||||
},
|
||||
{
|
||||
"epoch": 0.06374400407961626,
|
||||
"grad_norm": 55419.26171875,
|
||||
"learning_rate": 2.9378325086498752e-05,
|
||||
"loss": 0.6679,
|
||||
"step": 4000
|
||||
},
|
||||
{
|
||||
"epoch": 0.07171200458956829,
|
||||
"grad_norm": 51254.3671875,
|
||||
"learning_rate": 2.929860273288228e-05,
|
||||
"loss": 0.6602,
|
||||
"step": 4500
|
||||
},
|
||||
{
|
||||
"epoch": 0.07968000509952032,
|
||||
"grad_norm": 51469.9765625,
|
||||
"learning_rate": 2.9218880379265813e-05,
|
||||
"loss": 0.6554,
|
||||
"step": 5000
|
||||
},
|
||||
{
|
||||
"epoch": 0.08764800560947236,
|
||||
"grad_norm": 69357.2265625,
|
||||
"learning_rate": 2.913915802564934e-05,
|
||||
"loss": 0.6516,
|
||||
"step": 5500
|
||||
},
|
||||
{
|
||||
"epoch": 0.09561600611942439,
|
||||
"grad_norm": 48255.7890625,
|
||||
"learning_rate": 2.9059435672032868e-05,
|
||||
"loss": 0.6482,
|
||||
"step": 6000
|
||||
},
|
||||
{
|
||||
"epoch": 0.10358400662937642,
|
||||
"grad_norm": 59810.20703125,
|
||||
"learning_rate": 2.8979713318416394e-05,
|
||||
"loss": 0.6397,
|
||||
"step": 6500
|
||||
},
|
||||
{
|
||||
"epoch": 0.11155200713932846,
|
||||
"grad_norm": 54012.7109375,
|
||||
"learning_rate": 2.8899990964799923e-05,
|
||||
"loss": 0.6425,
|
||||
"step": 7000
|
||||
},
|
||||
{
|
||||
"epoch": 0.11952000764928049,
|
||||
"grad_norm": 47228.74609375,
|
||||
"learning_rate": 2.8820268611183452e-05,
|
||||
"loss": 0.6392,
|
||||
"step": 7500
|
||||
},
|
||||
{
|
||||
"epoch": 0.12748800815923253,
|
||||
"grad_norm": 55561.9453125,
|
||||
"learning_rate": 2.874054625756698e-05,
|
||||
"loss": 0.6342,
|
||||
"step": 8000
|
||||
},
|
||||
{
|
||||
"epoch": 0.13545600866918456,
|
||||
"grad_norm": 50830.77734375,
|
||||
"learning_rate": 2.866082390395051e-05,
|
||||
"loss": 0.6288,
|
||||
"step": 8500
|
||||
},
|
||||
{
|
||||
"epoch": 0.14342400917913659,
|
||||
"grad_norm": 45037.15234375,
|
||||
"learning_rate": 2.858110155033404e-05,
|
||||
"loss": 0.6283,
|
||||
"step": 9000
|
||||
},
|
||||
{
|
||||
"epoch": 0.1513920096890886,
|
||||
"grad_norm": 42729.57421875,
|
||||
"learning_rate": 2.8501379196717565e-05,
|
||||
"loss": 0.6252,
|
||||
"step": 9500
|
||||
},
|
||||
{
|
||||
"epoch": 0.15936001019904064,
|
||||
"grad_norm": 46592.18359375,
|
||||
"learning_rate": 2.8421656843101094e-05,
|
||||
"loss": 0.6199,
|
||||
"step": 10000
|
||||
},
|
||||
{
|
||||
"epoch": 0.1673280107089927,
|
||||
"grad_norm": 40794.70703125,
|
||||
"learning_rate": 2.834193448948462e-05,
|
||||
"loss": 0.6237,
|
||||
"step": 10500
|
||||
},
|
||||
{
|
||||
"epoch": 0.17529601121894473,
|
||||
"grad_norm": 40227.96484375,
|
||||
"learning_rate": 2.8262212135868152e-05,
|
||||
"loss": 0.6174,
|
||||
"step": 11000
|
||||
},
|
||||
{
|
||||
"epoch": 0.18326401172889675,
|
||||
"grad_norm": 49994.6015625,
|
||||
"learning_rate": 2.818248978225168e-05,
|
||||
"loss": 0.6151,
|
||||
"step": 11500
|
||||
},
|
||||
{
|
||||
"epoch": 0.19123201223884878,
|
||||
"grad_norm": 55100.75390625,
|
||||
"learning_rate": 2.8102767428635207e-05,
|
||||
"loss": 0.6117,
|
||||
"step": 12000
|
||||
},
|
||||
{
|
||||
"epoch": 0.1992000127488008,
|
||||
"grad_norm": 93011.546875,
|
||||
"learning_rate": 2.8023045075018736e-05,
|
||||
"loss": 0.6122,
|
||||
"step": 12500
|
||||
},
|
||||
{
|
||||
"epoch": 0.20716801325875284,
|
||||
"grad_norm": 47631.09375,
|
||||
"learning_rate": 2.794332272140226e-05,
|
||||
"loss": 0.6133,
|
||||
"step": 13000
|
||||
},
|
||||
{
|
||||
"epoch": 0.2151360137687049,
|
||||
"grad_norm": 42235.15625,
|
||||
"learning_rate": 2.786360036778579e-05,
|
||||
"loss": 0.6038,
|
||||
"step": 13500
|
||||
},
|
||||
{
|
||||
"epoch": 0.22310401427865692,
|
||||
"grad_norm": 41144.3359375,
|
||||
"learning_rate": 2.7783878014169323e-05,
|
||||
"loss": 0.6074,
|
||||
"step": 14000
|
||||
},
|
||||
{
|
||||
"epoch": 0.23107201478860895,
|
||||
"grad_norm": 47137.19921875,
|
||||
"learning_rate": 2.770415566055285e-05,
|
||||
"loss": 0.6047,
|
||||
"step": 14500
|
||||
},
|
||||
{
|
||||
"epoch": 0.23904001529856098,
|
||||
"grad_norm": 37974.65625,
|
||||
"learning_rate": 2.7624433306936378e-05,
|
||||
"loss": 0.6058,
|
||||
"step": 15000
|
||||
},
|
||||
{
|
||||
"epoch": 0.247008015808513,
|
||||
"grad_norm": 49424.44140625,
|
||||
"learning_rate": 2.7544710953319903e-05,
|
||||
"loss": 0.6014,
|
||||
"step": 15500
|
||||
},
|
||||
{
|
||||
"epoch": 0.25497601631846506,
|
||||
"grad_norm": 57363.96875,
|
||||
"learning_rate": 2.7464988599703432e-05,
|
||||
"loss": 0.6012,
|
||||
"step": 16000
|
||||
},
|
||||
{
|
||||
"epoch": 0.26294401682841706,
|
||||
"grad_norm": 48850.90234375,
|
||||
"learning_rate": 2.738526624608696e-05,
|
||||
"loss": 0.5969,
|
||||
"step": 16500
|
||||
},
|
||||
{
|
||||
"epoch": 0.2709120173383691,
|
||||
"grad_norm": 60200.43359375,
|
||||
"learning_rate": 2.730554389247049e-05,
|
||||
"loss": 0.5958,
|
||||
"step": 17000
|
||||
},
|
||||
{
|
||||
"epoch": 0.2788800178483211,
|
||||
"grad_norm": 48039.16015625,
|
||||
"learning_rate": 2.722582153885402e-05,
|
||||
"loss": 0.598,
|
||||
"step": 17500
|
||||
},
|
||||
{
|
||||
"epoch": 0.28684801835827317,
|
||||
"grad_norm": 34431.2265625,
|
||||
"learning_rate": 2.7146099185237545e-05,
|
||||
"loss": 0.5953,
|
||||
"step": 18000
|
||||
},
|
||||
{
|
||||
"epoch": 0.2948160188682252,
|
||||
"grad_norm": 34913.16015625,
|
||||
"learning_rate": 2.7066376831621074e-05,
|
||||
"loss": 0.5932,
|
||||
"step": 18500
|
||||
},
|
||||
{
|
||||
"epoch": 0.3027840193781772,
|
||||
"grad_norm": 36192.234375,
|
||||
"learning_rate": 2.6986654478004603e-05,
|
||||
"loss": 0.5939,
|
||||
"step": 19000
|
||||
},
|
||||
{
|
||||
"epoch": 0.3107520198881293,
|
||||
"grad_norm": 46195.62890625,
|
||||
"learning_rate": 2.690693212438813e-05,
|
||||
"loss": 0.5914,
|
||||
"step": 19500
|
||||
},
|
||||
{
|
||||
"epoch": 0.3187200203980813,
|
||||
"grad_norm": 43815.43359375,
|
||||
"learning_rate": 2.682720977077166e-05,
|
||||
"loss": 0.5904,
|
||||
"step": 20000
|
||||
},
|
||||
{
|
||||
"epoch": 0.32668802090803334,
|
||||
"grad_norm": 35572.34375,
|
||||
"learning_rate": 2.674748741715519e-05,
|
||||
"loss": 0.5865,
|
||||
"step": 20500
|
||||
},
|
||||
{
|
||||
"epoch": 0.3346560214179854,
|
||||
"grad_norm": 36805.02734375,
|
||||
"learning_rate": 2.6667765063538716e-05,
|
||||
"loss": 0.5904,
|
||||
"step": 21000
|
||||
},
|
||||
{
|
||||
"epoch": 0.3426240219279374,
|
||||
"grad_norm": 45271.30078125,
|
||||
"learning_rate": 2.6588042709922245e-05,
|
||||
"loss": 0.5873,
|
||||
"step": 21500
|
||||
},
|
||||
{
|
||||
"epoch": 0.35059202243788945,
|
||||
"grad_norm": 35245.0234375,
|
||||
"learning_rate": 2.650832035630577e-05,
|
||||
"loss": 0.5856,
|
||||
"step": 22000
|
||||
},
|
||||
{
|
||||
"epoch": 0.35856002294784145,
|
||||
"grad_norm": 34578.25,
|
||||
"learning_rate": 2.64285980026893e-05,
|
||||
"loss": 0.5795,
|
||||
"step": 22500
|
||||
},
|
||||
{
|
||||
"epoch": 0.3665280234577935,
|
||||
"grad_norm": 40504.375,
|
||||
"learning_rate": 2.6348875649072832e-05,
|
||||
"loss": 0.5846,
|
||||
"step": 23000
|
||||
},
|
||||
{
|
||||
"epoch": 0.37449602396774556,
|
||||
"grad_norm": 37683.4453125,
|
||||
"learning_rate": 2.6269153295456358e-05,
|
||||
"loss": 0.5792,
|
||||
"step": 23500
|
||||
},
|
||||
{
|
||||
"epoch": 0.38246402447769756,
|
||||
"grad_norm": 35323.41015625,
|
||||
"learning_rate": 2.6189430941839887e-05,
|
||||
"loss": 0.5803,
|
||||
"step": 24000
|
||||
},
|
||||
{
|
||||
"epoch": 0.3904320249876496,
|
||||
"grad_norm": 47546.1953125,
|
||||
"learning_rate": 2.6109708588223413e-05,
|
||||
"loss": 0.5805,
|
||||
"step": 24500
|
||||
},
|
||||
{
|
||||
"epoch": 0.3984000254976016,
|
||||
"grad_norm": 44604.859375,
|
||||
"learning_rate": 2.6029986234606942e-05,
|
||||
"loss": 0.5773,
|
||||
"step": 25000
|
||||
},
|
||||
{
|
||||
"epoch": 0.4063680260075537,
|
||||
"grad_norm": 47108.78125,
|
||||
"learning_rate": 2.595026388099047e-05,
|
||||
"loss": 0.5825,
|
||||
"step": 25500
|
||||
},
|
||||
{
|
||||
"epoch": 0.4143360265175057,
|
||||
"grad_norm": 42788.87109375,
|
||||
"learning_rate": 2.5870541527374e-05,
|
||||
"loss": 0.578,
|
||||
"step": 26000
|
||||
},
|
||||
{
|
||||
"epoch": 0.42230402702745773,
|
||||
"grad_norm": 33550.89453125,
|
||||
"learning_rate": 2.579081917375753e-05,
|
||||
"loss": 0.5779,
|
||||
"step": 26500
|
||||
},
|
||||
{
|
||||
"epoch": 0.4302720275374098,
|
||||
"grad_norm": 36036.8984375,
|
||||
"learning_rate": 2.5711096820141055e-05,
|
||||
"loss": 0.574,
|
||||
"step": 27000
|
||||
},
|
||||
{
|
||||
"epoch": 0.4382400280473618,
|
||||
"grad_norm": 61990.4296875,
|
||||
"learning_rate": 2.5631374466524584e-05,
|
||||
"loss": 0.5751,
|
||||
"step": 27500
|
||||
},
|
||||
{
|
||||
"epoch": 0.44620802855731384,
|
||||
"grad_norm": 35250.4375,
|
||||
"learning_rate": 2.5551652112908113e-05,
|
||||
"loss": 0.5761,
|
||||
"step": 28000
|
||||
},
|
||||
{
|
||||
"epoch": 0.45417602906726584,
|
||||
"grad_norm": 40302.27734375,
|
||||
"learning_rate": 2.547192975929164e-05,
|
||||
"loss": 0.5723,
|
||||
"step": 28500
|
||||
},
|
||||
{
|
||||
"epoch": 0.4621440295772179,
|
||||
"grad_norm": 40248.03515625,
|
||||
"learning_rate": 2.539220740567517e-05,
|
||||
"loss": 0.5754,
|
||||
"step": 29000
|
||||
},
|
||||
{
|
||||
"epoch": 0.47011203008716995,
|
||||
"grad_norm": 38083.6484375,
|
||||
"learning_rate": 2.5312485052058697e-05,
|
||||
"loss": 0.5719,
|
||||
"step": 29500
|
||||
},
|
||||
{
|
||||
"epoch": 0.47808003059712195,
|
||||
"grad_norm": 32291.21484375,
|
||||
"learning_rate": 2.5232762698442226e-05,
|
||||
"loss": 0.5726,
|
||||
"step": 30000
|
||||
},
|
||||
{
|
||||
"epoch": 0.486048031107074,
|
||||
"grad_norm": 39065.84375,
|
||||
"learning_rate": 2.5153040344825755e-05,
|
||||
"loss": 0.5716,
|
||||
"step": 30500
|
||||
},
|
||||
{
|
||||
"epoch": 0.494016031617026,
|
||||
"grad_norm": 46160.44921875,
|
||||
"learning_rate": 2.507331799120928e-05,
|
||||
"loss": 0.5723,
|
||||
"step": 31000
|
||||
},
|
||||
{
|
||||
"epoch": 0.5019840321269781,
|
||||
"grad_norm": 36111.65625,
|
||||
"learning_rate": 2.499359563759281e-05,
|
||||
"loss": 0.569,
|
||||
"step": 31500
|
||||
},
|
||||
{
|
||||
"epoch": 0.5099520326369301,
|
||||
"grad_norm": 33012.55859375,
|
||||
"learning_rate": 2.4913873283976342e-05,
|
||||
"loss": 0.5692,
|
||||
"step": 32000
|
||||
},
|
||||
{
|
||||
"epoch": 0.5179200331468822,
|
||||
"grad_norm": 40300.83203125,
|
||||
"learning_rate": 2.4834150930359868e-05,
|
||||
"loss": 0.5686,
|
||||
"step": 32500
|
||||
},
|
||||
{
|
||||
"epoch": 0.5258880336568341,
|
||||
"grad_norm": 38612.78515625,
|
||||
"learning_rate": 2.4754428576743397e-05,
|
||||
"loss": 0.5663,
|
||||
"step": 33000
|
||||
},
|
||||
{
|
||||
"epoch": 0.5338560341667862,
|
||||
"grad_norm": 52813.0234375,
|
||||
"learning_rate": 2.4674706223126922e-05,
|
||||
"loss": 0.5662,
|
||||
"step": 33500
|
||||
},
|
||||
{
|
||||
"epoch": 0.5418240346767382,
|
||||
"grad_norm": 36266.23046875,
|
||||
"learning_rate": 2.459498386951045e-05,
|
||||
"loss": 0.5635,
|
||||
"step": 34000
|
||||
},
|
||||
{
|
||||
"epoch": 0.5497920351866903,
|
||||
"grad_norm": 38791.453125,
|
||||
"learning_rate": 2.451526151589398e-05,
|
||||
"loss": 0.5677,
|
||||
"step": 34500
|
||||
},
|
||||
{
|
||||
"epoch": 0.5577600356966422,
|
||||
"grad_norm": 43926.4296875,
|
||||
"learning_rate": 2.443553916227751e-05,
|
||||
"loss": 0.5688,
|
||||
"step": 35000
|
||||
},
|
||||
{
|
||||
"epoch": 0.5657280362065943,
|
||||
"grad_norm": 32247.173828125,
|
||||
"learning_rate": 2.435581680866104e-05,
|
||||
"loss": 0.5686,
|
||||
"step": 35500
|
||||
},
|
||||
{
|
||||
"epoch": 0.5736960367165463,
|
||||
"grad_norm": 35230.44140625,
|
||||
"learning_rate": 2.4276094455044564e-05,
|
||||
"loss": 0.566,
|
||||
"step": 36000
|
||||
},
|
||||
{
|
||||
"epoch": 0.5816640372264984,
|
||||
"grad_norm": 33197.58203125,
|
||||
"learning_rate": 2.4196372101428093e-05,
|
||||
"loss": 0.5629,
|
||||
"step": 36500
|
||||
},
|
||||
{
|
||||
"epoch": 0.5896320377364505,
|
||||
"grad_norm": 41267.5390625,
|
||||
"learning_rate": 2.4116649747811623e-05,
|
||||
"loss": 0.5642,
|
||||
"step": 37000
|
||||
},
|
||||
{
|
||||
"epoch": 0.5976000382464024,
|
||||
"grad_norm": 41852.71875,
|
||||
"learning_rate": 2.4036927394195148e-05,
|
||||
"loss": 0.5635,
|
||||
"step": 37500
|
||||
},
|
||||
{
|
||||
"epoch": 0.6055680387563545,
|
||||
"grad_norm": 32820.1015625,
|
||||
"learning_rate": 2.395720504057868e-05,
|
||||
"loss": 0.5639,
|
||||
"step": 38000
|
||||
},
|
||||
{
|
||||
"epoch": 0.6135360392663065,
|
||||
"grad_norm": 39303.70703125,
|
||||
"learning_rate": 2.3877482686962206e-05,
|
||||
"loss": 0.5605,
|
||||
"step": 38500
|
||||
},
|
||||
{
|
||||
"epoch": 0.6215040397762586,
|
||||
"grad_norm": 53008.375,
|
||||
"learning_rate": 2.3797760333345735e-05,
|
||||
"loss": 0.562,
|
||||
"step": 39000
|
||||
},
|
||||
{
|
||||
"epoch": 0.6294720402862106,
|
||||
"grad_norm": 41845.28515625,
|
||||
"learning_rate": 2.3718037979729264e-05,
|
||||
"loss": 0.5641,
|
||||
"step": 39500
|
||||
},
|
||||
{
|
||||
"epoch": 0.6374400407961626,
|
||||
"grad_norm": 49620.7734375,
|
||||
"learning_rate": 2.363831562611279e-05,
|
||||
"loss": 0.5582,
|
||||
"step": 40000
|
||||
},
|
||||
{
|
||||
"epoch": 0.6454080413061146,
|
||||
"grad_norm": 46115.32421875,
|
||||
"learning_rate": 2.355859327249632e-05,
|
||||
"loss": 0.5616,
|
||||
"step": 40500
|
||||
},
|
||||
{
|
||||
"epoch": 0.6533760418160667,
|
||||
"grad_norm": 33357.73828125,
|
||||
"learning_rate": 2.3478870918879848e-05,
|
||||
"loss": 0.5633,
|
||||
"step": 41000
|
||||
},
|
||||
{
|
||||
"epoch": 0.6613440423260187,
|
||||
"grad_norm": 48345.0859375,
|
||||
"learning_rate": 2.3399148565263377e-05,
|
||||
"loss": 0.5588,
|
||||
"step": 41500
|
||||
},
|
||||
{
|
||||
"epoch": 0.6693120428359708,
|
||||
"grad_norm": 46256.62890625,
|
||||
"learning_rate": 2.3319426211646906e-05,
|
||||
"loss": 0.5584,
|
||||
"step": 42000
|
||||
},
|
||||
{
|
||||
"epoch": 0.6772800433459227,
|
||||
"grad_norm": 34897.8828125,
|
||||
"learning_rate": 2.3239703858030432e-05,
|
||||
"loss": 0.5577,
|
||||
"step": 42500
|
||||
},
|
||||
{
|
||||
"epoch": 0.6852480438558748,
|
||||
"grad_norm": 86223.203125,
|
||||
"learning_rate": 2.315998150441396e-05,
|
||||
"loss": 0.5549,
|
||||
"step": 43000
|
||||
},
|
||||
{
|
||||
"epoch": 0.6932160443658268,
|
||||
"grad_norm": 32481.861328125,
|
||||
"learning_rate": 2.308025915079749e-05,
|
||||
"loss": 0.5623,
|
||||
"step": 43500
|
||||
},
|
||||
{
|
||||
"epoch": 0.7011840448757789,
|
||||
"grad_norm": 33890.0859375,
|
||||
"learning_rate": 2.300053679718102e-05,
|
||||
"loss": 0.5618,
|
||||
"step": 44000
|
||||
},
|
||||
{
|
||||
"epoch": 0.709152045385731,
|
||||
"grad_norm": 32936.66015625,
|
||||
"learning_rate": 2.2920814443564548e-05,
|
||||
"loss": 0.5613,
|
||||
"step": 44500
|
||||
},
|
||||
{
|
||||
"epoch": 0.7171200458956829,
|
||||
"grad_norm": 36461.4453125,
|
||||
"learning_rate": 2.2841092089948074e-05,
|
||||
"loss": 0.5581,
|
||||
"step": 45000
|
||||
},
|
||||
{
|
||||
"epoch": 0.725088046405635,
|
||||
"grad_norm": 35596.3046875,
|
||||
"learning_rate": 2.2761369736331603e-05,
|
||||
"loss": 0.5514,
|
||||
"step": 45500
|
||||
},
|
||||
{
|
||||
"epoch": 0.733056046915587,
|
||||
"grad_norm": 46988.7890625,
|
||||
"learning_rate": 2.2681647382715132e-05,
|
||||
"loss": 0.5511,
|
||||
"step": 46000
|
||||
},
|
||||
{
|
||||
"epoch": 0.7410240474255391,
|
||||
"grad_norm": 34446.23046875,
|
||||
"learning_rate": 2.2601925029098658e-05,
|
||||
"loss": 0.556,
|
||||
"step": 46500
|
||||
},
|
||||
{
|
||||
"epoch": 0.7489920479354911,
|
||||
"grad_norm": 30729.033203125,
|
||||
"learning_rate": 2.252220267548219e-05,
|
||||
"loss": 0.5568,
|
||||
"step": 47000
|
||||
},
|
||||
{
|
||||
"epoch": 0.7569600484454431,
|
||||
"grad_norm": 35997.25390625,
|
||||
"learning_rate": 2.2442480321865716e-05,
|
||||
"loss": 0.5538,
|
||||
"step": 47500
|
||||
},
|
||||
{
|
||||
"epoch": 0.7649280489553951,
|
||||
"grad_norm": 36570.2578125,
|
||||
"learning_rate": 2.2362757968249245e-05,
|
||||
"loss": 0.5524,
|
||||
"step": 48000
|
||||
},
|
||||
{
|
||||
"epoch": 0.7728960494653472,
|
||||
"grad_norm": 38073.46484375,
|
||||
"learning_rate": 2.2283035614632774e-05,
|
||||
"loss": 0.5564,
|
||||
"step": 48500
|
||||
},
|
||||
{
|
||||
"epoch": 0.7808640499752992,
|
||||
"grad_norm": 32213.791015625,
|
||||
"learning_rate": 2.22033132610163e-05,
|
||||
"loss": 0.5531,
|
||||
"step": 49000
|
||||
},
|
||||
{
|
||||
"epoch": 0.7888320504852512,
|
||||
"grad_norm": 51021.55078125,
|
||||
"learning_rate": 2.212359090739983e-05,
|
||||
"loss": 0.5551,
|
||||
"step": 49500
|
||||
},
|
||||
{
|
||||
"epoch": 0.7968000509952032,
|
||||
"grad_norm": 34028.9609375,
|
||||
"learning_rate": 2.2043868553783358e-05,
|
||||
"loss": 0.5522,
|
||||
"step": 50000
|
||||
},
|
||||
{
|
||||
"epoch": 0.8047680515051553,
|
||||
"grad_norm": 35102.0390625,
|
||||
"learning_rate": 2.1964146200166887e-05,
|
||||
"loss": 0.551,
|
||||
"step": 50500
|
||||
},
|
||||
{
|
||||
"epoch": 0.8127360520151073,
|
||||
"grad_norm": 38054.03125,
|
||||
"learning_rate": 2.1884423846550416e-05,
|
||||
"loss": 0.5564,
|
||||
"step": 51000
|
||||
},
|
||||
{
|
||||
"epoch": 0.8207040525250594,
|
||||
"grad_norm": 34621.86328125,
|
||||
"learning_rate": 2.180470149293394e-05,
|
||||
"loss": 0.5505,
|
||||
"step": 51500
|
||||
},
|
||||
{
|
||||
"epoch": 0.8286720530350113,
|
||||
"grad_norm": 34099.5859375,
|
||||
"learning_rate": 2.172497913931747e-05,
|
||||
"loss": 0.5504,
|
||||
"step": 52000
|
||||
},
|
||||
{
|
||||
"epoch": 0.8366400535449634,
|
||||
"grad_norm": 36866.79296875,
|
||||
"learning_rate": 2.1645256785701e-05,
|
||||
"loss": 0.5473,
|
||||
"step": 52500
|
||||
},
|
||||
{
|
||||
"epoch": 0.8446080540549155,
|
||||
"grad_norm": 38420.94140625,
|
||||
"learning_rate": 2.156553443208453e-05,
|
||||
"loss": 0.547,
|
||||
"step": 53000
|
||||
},
|
||||
{
|
||||
"epoch": 0.8525760545648675,
|
||||
"grad_norm": 35430.8984375,
|
||||
"learning_rate": 2.1485812078468058e-05,
|
||||
"loss": 0.5504,
|
||||
"step": 53500
|
||||
},
|
||||
{
|
||||
"epoch": 0.8605440550748196,
|
||||
"grad_norm": 35433.71875,
|
||||
"learning_rate": 2.1406089724851584e-05,
|
||||
"loss": 0.5547,
|
||||
"step": 54000
|
||||
},
|
||||
{
|
||||
"epoch": 0.8685120555847715,
|
||||
"grad_norm": 41701.05859375,
|
||||
"learning_rate": 2.1326367371235113e-05,
|
||||
"loss": 0.5517,
|
||||
"step": 54500
|
||||
},
|
||||
{
|
||||
"epoch": 0.8764800560947236,
|
||||
"grad_norm": 34275.7265625,
|
||||
"learning_rate": 2.124664501761864e-05,
|
||||
"loss": 0.5496,
|
||||
"step": 55000
|
||||
}
|
||||
],
|
||||
"logging_steps": 500,
|
||||
"max_steps": 188253,
|
||||
"num_input_tokens_seen": 0,
|
||||
"num_train_epochs": 3,
|
||||
"save_steps": 1000,
|
||||
"stateful_callbacks": {
|
||||
"TrainerControl": {
|
||||
"args": {
|
||||
"should_epoch_stop": false,
|
||||
"should_evaluate": false,
|
||||
"should_log": false,
|
||||
"should_save": true,
|
||||
"should_training_stop": false
|
||||
},
|
||||
"attributes": {}
|
||||
}
|
||||
},
|
||||
"total_flos": 1.1496849408e+17,
|
||||
"train_batch_size": 16,
|
||||
"trial_name": null,
|
||||
"trial_params": null
|
||||
}
|
||||
3
last-checkpoint/training_args.bin
Normal file
3
last-checkpoint/training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c2e7fcb26e3e3848d243625e49f36d2273fbe5c83ece34d46111c33604627a5b
|
||||
size 5368
|
||||
50259
last-checkpoint/vocab.json
Normal file
50259
last-checkpoint/vocab.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user