From c1cab8bfece1b2f1ebfe1b331b7982bf76903d69 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Thu, 23 Apr 2026 23:37:08 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: jackf857/llama-3-8b-base-kto-ultrafeedback-8xh200 Source: Original Platform --- .gitattributes | 36 + README.md | 78 ++ all_results.json | 22 + config.json | 29 + eval_results.json | 16 + generation_config.json | 9 + model-00001-of-00007.safetensors | 3 + model-00002-of-00007.safetensors | 3 + model-00003-of-00007.safetensors | 3 + model-00004-of-00007.safetensors | 3 + model-00005-of-00007.safetensors | 3 + model-00006-of-00007.safetensors | 3 + model-00007-of-00007.safetensors | 3 + model.safetensors.index.json | 298 +++++ special_tokens_map.json | 23 + tokenizer.json | 3 + tokenizer_config.json | 2064 ++++++++++++++++++++++++++++++ train_results.json | 9 + trainer_state.json | 1547 ++++++++++++++++++++++ 19 files changed, 4155 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 config.json create mode 100644 eval_results.json create mode 100644 generation_config.json create mode 100644 model-00001-of-00007.safetensors create mode 100644 model-00002-of-00007.safetensors create mode 100644 model-00003-of-00007.safetensors create mode 100644 model-00004-of-00007.safetensors create mode 100644 model-00005-of-00007.safetensors create mode 100644 model-00006-of-00007.safetensors create mode 100644 model-00007-of-00007.safetensors create mode 100644 model.safetensors.index.json create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train_results.json create mode 100644 trainer_state.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..25d676d --- /dev/null +++ b/README.md @@ -0,0 +1,78 @@ +--- +library_name: transformers +base_model: W-61/llama-3-8b-base-sft-ultrachat-8xh200 +tags: +- alignment-handbook +- kto +- generated_from_trainer +datasets: +- HuggingFaceH4/ultrafeedback_binarized +model-index: +- name: llama-3-8b-base-kto-ultrafeedback-8xh200 + results: [] +--- + + + +# llama-3-8b-base-kto-ultrafeedback-8xh200 + +This model is a fine-tuned version of [W-61/llama-3-8b-base-sft-ultrachat-8xh200](https://huggingface.co/W-61/llama-3-8b-base-sft-ultrachat-8xh200) on the HuggingFaceH4/ultrafeedback_binarized dataset. +It achieves the following results on the evaluation set: +- Loss: 0.3658 +- Rewards/chosen: 0.1622 +- Logps/chosen: -286.2337 +- Rewards/rejected: -2.5444 +- Logps/rejected: -292.3963 +- Rewards/margins: 2.7066 +- Kl: 0.0 +- Logits/chosen: -140467840.0 +- Logits/rejected: -139209600.0 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-07 +- train_batch_size: 4 +- eval_batch_size: 4 +- seed: 42 +- distributed_type: multi-GPU +- num_devices: 8 +- gradient_accumulation_steps: 4 +- total_train_batch_size: 128 +- total_eval_batch_size: 32 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 1 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Logps/chosen | Rewards/rejected | Logps/rejected | Rewards/margins | Kl | Logits/chosen | Logits/rejected | +|:-------------:|:------:|:----:|:---------------:|:--------------:|:------------:|:----------------:|:--------------:|:---------------:|:---:|:-------------:|:---------------:| +| 1.5841 | 0.2094 | 200 | 0.3971 | -0.1699 | -289.5548 | -1.7146 | -284.0978 | 1.5447 | 0.0 | -151004736.0 | -149476768.0 | +| 1.404 | 0.4188 | 400 | 0.3773 | -0.0342 | -288.1983 | -2.3874 | -290.8255 | 2.3531 | 0.0 | -143785152.0 | -142386976.0 | +| 1.4253 | 0.6283 | 600 | 0.3684 | -0.3211 | -291.0670 | -3.1407 | -298.3589 | 2.8196 | 0.0 | -145117536.0 | -143700400.0 | +| 1.4432 | 0.8377 | 800 | 0.3658 | 0.1622 | -286.2337 | -2.5444 | -292.3963 | 2.7066 | 0.0 | -140467840.0 | -139209600.0 | + + +### Framework versions + +- Transformers 4.51.0 +- Pytorch 2.3.1+cu121 +- Datasets 2.21.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..a50c634 --- /dev/null +++ b/all_results.json @@ -0,0 +1,22 @@ +{ + "epoch": 1.0, + "eval_kl": 0.0, + "eval_logits/chosen": -141123088.0, + "eval_logits/rejected": -139890512.0, + "eval_logps/chosen": -287.08896875, + "eval_logps/rejected": -293.79315625, + "eval_loss": 0.3648396134376526, + "eval_rewards/chosen": 0.0766905059814453, + "eval_rewards/margins": 2.760806716918945, + "eval_rewards/rejected": -2.6841162109375, + "eval_runtime": 92.6279, + "eval_samples": 4000, + "eval_samples_per_second": 43.184, + "eval_steps_per_second": 1.349, + "total_flos": 0.0, + "train_loss": 1.5426912418834826, + "train_runtime": 5367.6535, + "train_samples": 122270, + "train_samples_per_second": 22.779, + "train_steps_per_second": 0.178 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5092b09 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": 128001, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "float32", + "transformers_version": "4.51.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000..aa7c483 --- /dev/null +++ b/eval_results.json @@ -0,0 +1,16 @@ +{ + "epoch": 1.0, + "eval_kl": 0.0, + "eval_logits/chosen": -141123088.0, + "eval_logits/rejected": -139890512.0, + "eval_logps/chosen": -287.08896875, + "eval_logps/rejected": -293.79315625, + "eval_loss": 0.3648396134376526, + "eval_rewards/chosen": 0.0766905059814453, + "eval_rewards/margins": 2.760806716918945, + "eval_rewards/rejected": -2.6841162109375, + "eval_runtime": 92.6279, + "eval_samples": 4000, + "eval_samples_per_second": 43.184, + "eval_steps_per_second": 1.349 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..76247c9 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,9 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": 128001, + "max_length": 4096, + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.0" +} diff --git a/model-00001-of-00007.safetensors b/model-00001-of-00007.safetensors new file mode 100644 index 0000000..5e8a850 --- /dev/null +++ b/model-00001-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d2fdb47ac842bc3425fc650f559c505a22b335713b24f9c5b71e766fb9b8921 +size 4886466168 diff --git a/model-00002-of-00007.safetensors b/model-00002-of-00007.safetensors new file mode 100644 index 0000000..9cc518a --- /dev/null +++ b/model-00002-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83386b9b90654cb81d9234e2939f3cd066264129c9498292436aca7324fbc4cc +size 4832007448 diff --git a/model-00003-of-00007.safetensors b/model-00003-of-00007.safetensors new file mode 100644 index 0000000..b18cbe0 --- /dev/null +++ b/model-00003-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:19b8f7fec091f7da3441864f654bbbc6ed2108124a2340428385d49b5ab75ecb +size 4999813112 diff --git a/model-00004-of-00007.safetensors b/model-00004-of-00007.safetensors new file mode 100644 index 0000000..7522500 --- /dev/null +++ b/model-00004-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2194e37452b3009e9e3833bbea3606fab03d1fe852818230503e341c759eccc6 +size 4999813128 diff --git a/model-00005-of-00007.safetensors b/model-00005-of-00007.safetensors new file mode 100644 index 0000000..ab4662f --- /dev/null +++ b/model-00005-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61837db0019dfcc15f59b1efcc0e02f41d55a28e36c8033a695a77b07de41c8e +size 4832007496 diff --git a/model-00006-of-00007.safetensors b/model-00006-of-00007.safetensors new file mode 100644 index 0000000..7d925ee --- /dev/null +++ b/model-00006-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f99ed497735e7603a18a907b945af2e66f15714a50543fc1b551081cdced4677 +size 4999813120 diff --git a/model-00007-of-00007.safetensors b/model-00007-of-00007.safetensors new file mode 100644 index 0000000..d60af77 --- /dev/null +++ b/model-00007-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:29168143cb76b55c4ff0322e8bb0cdc8327dc1aa3e4f8b3472fa7f26c20f8c23 +size 2571158184 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..0985084 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 32121044992 + }, + "weight_map": { + "lm_head.weight": "model-00007-of-00007.safetensors", + "model.embed_tokens.weight": "model-00001-of-00007.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.20.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.3.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.30.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.norm.weight": "model-00007-of-00007.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..e5b39b6 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..86a3394 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5cf44023714fb39b05e71e425f8d7b92805ff73f7988b083b8c87f0bf87393 +size 17209961 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..8c6916a --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}", + "clean_up_tokenization_spaces": true, + "eos_token": "<|end_of_text|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 2048, + "pad_token": "<|end_of_text|>", + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..2d65f4f --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 1.0, + "total_flos": 0.0, + "train_loss": 1.5426912418834826, + "train_runtime": 5367.6535, + "train_samples": 122270, + "train_samples_per_second": 22.779, + "train_steps_per_second": 0.178 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..e555ce4 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,1547 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 200, + "global_step": 955, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010471204188481676, + "grad_norm": 45.70638656616211, + "kl": 0.03365863859653473, + "learning_rate": 0.0, + "logits/chosen": -133350016.0, + "logits/rejected": -100751848.0, + "logps/chosen": -199.38916015625, + "logps/rejected": -248.57103704637098, + "loss": 1.9996, + "rewards/chosen": -0.006603976993849783, + "rewards/margins": 0.002509254980413562, + "rewards/rejected": -0.009113231974263345, + "step": 1 + }, + { + "epoch": 0.010471204188481676, + "grad_norm": 45.63622283935547, + "kl": 0.05001102015376091, + "learning_rate": 4.6875e-08, + "logits/chosen": -114836760.0, + "logits/rejected": -115496552.0, + "logps/chosen": -280.943407960199, + "logps/rejected": -255.2594489981785, + "loss": 2.0019, + "rewards/chosen": -0.001783605436385172, + "rewards/margins": -0.0038228675079356074, + "rewards/rejected": 0.0020392620715504353, + "step": 10 + }, + { + "epoch": 0.020942408376963352, + "grad_norm": 49.609336853027344, + "kl": 0.050704918801784515, + "learning_rate": 9.895833333333332e-08, + "logits/chosen": -125602032.0, + "logits/rejected": -116276880.0, + "logps/chosen": -279.09286115269464, + "logps/rejected": -261.840047998366, + "loss": 1.9992, + "rewards/chosen": 0.0030849476060467564, + "rewards/margins": 0.0017726313628812977, + "rewards/rejected": 0.0013123162431654587, + "step": 20 + }, + { + "epoch": 0.031413612565445025, + "grad_norm": 45.7076301574707, + "kl": 0.0796850323677063, + "learning_rate": 1.5104166666666664e-07, + "logits/chosen": -117311640.0, + "logits/rejected": -114144304.0, + "logps/chosen": -294.7149727852853, + "logps/rejected": -242.79242162052117, + "loss": 1.9982, + "rewards/chosen": 0.009787982648557372, + "rewards/margins": 0.002934079104880057, + "rewards/rejected": 0.006853903543677315, + "step": 30 + }, + { + "epoch": 0.041884816753926704, + "grad_norm": 53.927669525146484, + "kl": 0.20030224323272705, + "learning_rate": 2.03125e-07, + "logits/chosen": -119643952.0, + "logits/rejected": -120977776.0, + "logps/chosen": -306.0683379120879, + "logps/rejected": -278.4179383748056, + "loss": 1.9963, + "rewards/chosen": 0.03329542031280661, + "rewards/margins": 0.007960547439639757, + "rewards/rejected": 0.025334872873166856, + "step": 40 + }, + { + "epoch": 0.05235602094240838, + "grad_norm": 51.90446090698242, + "kl": 0.34236329793930054, + "learning_rate": 2.552083333333333e-07, + "logits/chosen": -119124032.0, + "logits/rejected": -115569560.0, + "logps/chosen": -311.1760096153846, + "logps/rejected": -268.8574900793651, + "loss": 1.9864, + "rewards/chosen": 0.07745902428260217, + "rewards/margins": 0.02584011520367111, + "rewards/rejected": 0.051618909078931054, + "step": 50 + }, + { + "epoch": 0.06282722513089005, + "grad_norm": 51.83867263793945, + "kl": 0.503300666809082, + "learning_rate": 3.0729166666666665e-07, + "logits/chosen": -114966488.0, + "logits/rejected": -120589360.0, + "logps/chosen": -299.37085962145113, + "logps/rejected": -272.62553212074306, + "loss": 1.9757, + "rewards/chosen": 0.12960234199788667, + "rewards/margins": 0.050474802944834835, + "rewards/rejected": 0.07912753905305184, + "step": 60 + }, + { + "epoch": 0.07329842931937172, + "grad_norm": 55.578922271728516, + "kl": 0.31499481201171875, + "learning_rate": 3.59375e-07, + "logits/chosen": -111468848.0, + "logits/rejected": -121516712.0, + "logps/chosen": -281.68463625401927, + "logps/rejected": -274.1886635638298, + "loss": 1.9691, + "rewards/chosen": 0.17197007304985806, + "rewards/margins": 0.07172654231262758, + "rewards/rejected": 0.10024353073723048, + "step": 70 + }, + { + "epoch": 0.08376963350785341, + "grad_norm": 53.44794464111328, + "kl": 0.14020584523677826, + "learning_rate": 4.114583333333333e-07, + "logits/chosen": -124242176.0, + "logits/rejected": -114853336.0, + "logps/chosen": -309.7781393568147, + "logps/rejected": -254.96207137161085, + "loss": 1.919, + "rewards/chosen": 0.2425672171857054, + "rewards/margins": 0.16257205424617438, + "rewards/rejected": 0.079995162939531, + "step": 80 + }, + { + "epoch": 0.09424083769633508, + "grad_norm": 45.9337043762207, + "kl": 0.010224603116512299, + "learning_rate": 4.6354166666666664e-07, + "logits/chosen": -114247248.0, + "logits/rejected": -121002304.0, + "logps/chosen": -255.63410433070865, + "logps/rejected": -255.94968507751938, + "loss": 1.9103, + "rewards/chosen": 0.2659489669199065, + "rewards/margins": 0.1896964983080636, + "rewards/rejected": 0.0762524686118429, + "step": 90 + }, + { + "epoch": 0.10471204188481675, + "grad_norm": 47.694000244140625, + "kl": 0.0, + "learning_rate": 4.999849525959245e-07, + "logits/chosen": -116696832.0, + "logits/rejected": -135091520.0, + "logps/chosen": -299.64565284653463, + "logps/rejected": -257.15984328635017, + "loss": 1.8452, + "rewards/chosen": 0.3131070278658725, + "rewards/margins": 0.35410608100594115, + "rewards/rejected": -0.04099905314006862, + "step": 100 + }, + { + "epoch": 0.11518324607329843, + "grad_norm": 45.92950439453125, + "kl": 0.0, + "learning_rate": 4.997174935782199e-07, + "logits/chosen": -114301056.0, + "logits/rejected": -125180608.0, + "logps/chosen": -289.91893468118195, + "logps/rejected": -249.99958300627944, + "loss": 1.8212, + "rewards/chosen": 0.0881744610206521, + "rewards/margins": 0.3989718005393923, + "rewards/rejected": -0.31079733951874017, + "step": 110 + }, + { + "epoch": 0.1256544502617801, + "grad_norm": 44.388973236083984, + "kl": 0.0, + "learning_rate": 4.9911605954668e-07, + "logits/chosen": -128287936.0, + "logits/rejected": -123292968.0, + "logps/chosen": -274.05433947772656, + "logps/rejected": -291.2209608505564, + "loss": 1.7655, + "rewards/chosen": 0.06379634663805983, + "rewards/margins": 0.5666303574066409, + "rewards/rejected": -0.502834010768581, + "step": 120 + }, + { + "epoch": 0.13612565445026178, + "grad_norm": 43.125389099121094, + "kl": 0.0, + "learning_rate": 4.981814548660135e-07, + "logits/chosen": -118625360.0, + "logits/rejected": -142319376.0, + "logps/chosen": -286.44763163349916, + "logps/rejected": -260.8665758862629, + "loss": 1.688, + "rewards/chosen": 0.16187965810595462, + "rewards/margins": 0.7612699556618077, + "rewards/rejected": -0.599390297555853, + "step": 130 + }, + { + "epoch": 0.14659685863874344, + "grad_norm": 43.5549201965332, + "kl": 0.0, + "learning_rate": 4.969149294871417e-07, + "logits/chosen": -134174704.0, + "logits/rejected": -129014448.0, + "logps/chosen": -270.30858126996804, + "logps/rejected": -286.30975248470946, + "loss": 1.6923, + "rewards/chosen": -0.13654317642553165, + "rewards/margins": 0.839808220327622, + "rewards/rejected": -0.9763513967531536, + "step": 140 + }, + { + "epoch": 0.15706806282722513, + "grad_norm": 38.105342864990234, + "kl": 0.0, + "learning_rate": 4.953181772754997e-07, + "logits/chosen": -140920352.0, + "logits/rejected": -132664600.0, + "logps/chosen": -272.86307251908397, + "logps/rejected": -266.89385, + "loss": 1.6369, + "rewards/chosen": -0.0188656086230096, + "rewards/margins": 1.1427980632519903, + "rewards/rejected": -1.161663671875, + "step": 150 + }, + { + "epoch": 0.16753926701570682, + "grad_norm": 38.238868713378906, + "kl": 0.0, + "learning_rate": 4.93393333745642e-07, + "logits/chosen": -130763216.0, + "logits/rejected": -132181120.0, + "logps/chosen": -267.7964184253247, + "logps/rejected": -265.75249435240966, + "loss": 1.6156, + "rewards/chosen": 0.03252483962418197, + "rewards/margins": 1.1381634585530223, + "rewards/rejected": -1.1056386189288403, + "step": 160 + }, + { + "epoch": 0.17801047120418848, + "grad_norm": 149.7418212890625, + "kl": 0.0, + "learning_rate": 4.9114297320518e-07, + "logits/chosen": -148455008.0, + "logits/rejected": -148213216.0, + "logps/chosen": -292.17505877742946, + "logps/rejected": -289.26684190031153, + "loss": 1.6163, + "rewards/chosen": -0.49163005195067594, + "rewards/margins": 1.4294290544185801, + "rewards/rejected": -1.9210591063692561, + "step": 170 + }, + { + "epoch": 0.18848167539267016, + "grad_norm": 43.059410095214844, + "kl": 0.0, + "learning_rate": 4.885701053118751e-07, + "logits/chosen": -147940400.0, + "logits/rejected": -144332512.0, + "logps/chosen": -282.0602057573416, + "logps/rejected": -282.34577409162716, + "loss": 1.5876, + "rewards/chosen": 0.034519776310397446, + "rewards/margins": 1.3361885912602889, + "rewards/rejected": -1.3016688149498914, + "step": 180 + }, + { + "epoch": 0.19895287958115182, + "grad_norm": 40.49046325683594, + "kl": 0.0, + "learning_rate": 4.856781710484872e-07, + "logits/chosen": -139495344.0, + "logits/rejected": -144202096.0, + "logps/chosen": -280.0632974481659, + "logps/rejected": -290.6424196018377, + "loss": 1.5551, + "rewards/chosen": 0.12813351523172722, + "rewards/margins": 1.4480502681389824, + "rewards/rejected": -1.319916752907255, + "step": 190 + }, + { + "epoch": 0.2094240837696335, + "grad_norm": 40.46213912963867, + "kl": 0.0, + "learning_rate": 4.824710381207655e-07, + "logits/chosen": -144665424.0, + "logits/rejected": -151805984.0, + "logps/chosen": -291.33322447749197, + "logps/rejected": -285.26674107142856, + "loss": 1.5841, + "rewards/chosen": 0.006562507420874102, + "rewards/margins": 1.4862394700443544, + "rewards/rejected": -1.4796769626234803, + "step": 200 + }, + { + "epoch": 0.2094240837696335, + "eval_kl": 0.0, + "eval_logits/chosen": -151004736.0, + "eval_logits/rejected": -149476768.0, + "eval_logps/chosen": -289.55475, + "eval_logps/rejected": -284.09784375, + "eval_loss": 0.39710375666618347, + "eval_rewards/chosen": -0.16988844299316405, + "eval_rewards/margins": 1.544695541381836, + "eval_rewards/rejected": -1.714583984375, + "eval_runtime": 92.5994, + "eval_samples_per_second": 43.197, + "eval_steps_per_second": 1.35, + "step": 200 + }, + { + "epoch": 0.2198952879581152, + "grad_norm": 36.308265686035156, + "kl": 0.0, + "learning_rate": 4.789529957847353e-07, + "logits/chosen": -152687040.0, + "logits/rejected": -144599248.0, + "logps/chosen": -300.46617366412215, + "logps/rejected": -276.6361, + "loss": 1.5542, + "rewards/chosen": -0.020091176215018935, + "rewards/margins": 1.684735972222481, + "rewards/rejected": -1.7048271484375, + "step": 210 + }, + { + "epoch": 0.23036649214659685, + "grad_norm": 42.04066467285156, + "kl": 0.0, + "learning_rate": 4.751287491101977e-07, + "logits/chosen": -153300528.0, + "logits/rejected": -141240576.0, + "logps/chosen": -284.2961228649068, + "logps/rejected": -269.83765723270443, + "loss": 1.626, + "rewards/chosen": -0.18326281908876407, + "rewards/margins": 1.4757443193374349, + "rewards/rejected": -1.6590071384261988, + "step": 220 + }, + { + "epoch": 0.24083769633507854, + "grad_norm": 36.196807861328125, + "kl": 0.0, + "learning_rate": 4.710034126881159e-07, + "logits/chosen": -159471936.0, + "logits/rejected": -135821408.0, + "logps/chosen": -301.3033342430859, + "logps/rejected": -293.3502055227656, + "loss": 1.5672, + "rewards/chosen": 0.1034569663945938, + "rewards/margins": 1.6865190209305867, + "rewards/rejected": -1.5830620545359928, + "step": 230 + }, + { + "epoch": 0.2513089005235602, + "grad_norm": 36.31395721435547, + "kl": 0.0, + "learning_rate": 4.665825037903035e-07, + "logits/chosen": -151562448.0, + "logits/rejected": -142325232.0, + "logps/chosen": -277.93509244992293, + "logps/rejected": -272.8287688193344, + "loss": 1.5525, + "rewards/chosen": 0.3289636691288882, + "rewards/margins": 1.6360129685110691, + "rewards/rejected": -1.307049299382181, + "step": 240 + }, + { + "epoch": 0.2617801047120419, + "grad_norm": 39.01026916503906, + "kl": 0.0, + "learning_rate": 4.618719349905619e-07, + "logits/chosen": -156142944.0, + "logits/rejected": -141863280.0, + "logps/chosen": -294.4734971374046, + "logps/rejected": -273.881775, + "loss": 1.525, + "rewards/chosen": 0.3023297957791627, + "rewards/margins": 1.7884680770291626, + "rewards/rejected": -1.48613828125, + "step": 250 + }, + { + "epoch": 0.27225130890052357, + "grad_norm": 40.547950744628906, + "kl": 0.0, + "learning_rate": 4.568780062571374e-07, + "logits/chosen": -152771744.0, + "logits/rejected": -151975136.0, + "logps/chosen": -278.8975861378205, + "logps/rejected": -288.9916634908537, + "loss": 1.5231, + "rewards/chosen": 0.07649397850036621, + "rewards/margins": 1.9374570090596268, + "rewards/rejected": -1.8609630305592606, + "step": 260 + }, + { + "epoch": 0.28272251308900526, + "grad_norm": 38.78754806518555, + "kl": 0.0, + "learning_rate": 4.516073965270717e-07, + "logits/chosen": -147246848.0, + "logits/rejected": -140734832.0, + "logps/chosen": -275.6838321596244, + "logps/rejected": -295.4493467238689, + "loss": 1.5248, + "rewards/chosen": 0.03273308743520149, + "rewards/margins": 1.9670623063070813, + "rewards/rejected": -1.93432921887188, + "step": 270 + }, + { + "epoch": 0.2931937172774869, + "grad_norm": 38.30440902709961, + "kl": 0.0, + "learning_rate": 4.460671547737158e-07, + "logits/chosen": -139242080.0, + "logits/rejected": -144891984.0, + "logps/chosen": -307.5467996382637, + "logps/rejected": -275.4886018237082, + "loss": 1.5128, + "rewards/chosen": -0.2156752313448302, + "rewards/margins": 1.9168140977062718, + "rewards/rejected": -2.132489329051102, + "step": 280 + }, + { + "epoch": 0.3036649214659686, + "grad_norm": 52.507747650146484, + "kl": 0.0, + "learning_rate": 4.40264690579353e-07, + "logits/chosen": -153254432.0, + "logits/rejected": -148096032.0, + "logps/chosen": -296.4355230564024, + "logps/rejected": -277.0494791666667, + "loss": 1.5223, + "rewards/chosen": -0.014792419061428162, + "rewards/margins": 2.2441466881976866, + "rewards/rejected": -2.2589391072591147, + "step": 290 + }, + { + "epoch": 0.31413612565445026, + "grad_norm": 42.4326286315918, + "kl": 0.0, + "learning_rate": 4.3420776422553916e-07, + "logits/chosen": -146678224.0, + "logits/rejected": -144266464.0, + "logps/chosen": -289.21781823394497, + "logps/rejected": -280.0624001597444, + "loss": 1.5308, + "rewards/chosen": 0.00835518734899865, + "rewards/margins": 2.1249945409457442, + "rewards/rejected": -2.1166393535967454, + "step": 300 + }, + { + "epoch": 0.32460732984293195, + "grad_norm": 48.941036224365234, + "kl": 0.0, + "learning_rate": 4.279044763144141e-07, + "logits/chosen": -139984352.0, + "logits/rejected": -148744560.0, + "logps/chosen": -271.40555111821084, + "logps/rejected": -308.63379204892965, + "loss": 1.5119, + "rewards/chosen": -0.12764440176966854, + "rewards/margins": 2.093687366674521, + "rewards/rejected": -2.22133176844419, + "step": 310 + }, + { + "epoch": 0.33507853403141363, + "grad_norm": 30.565563201904297, + "kl": 0.0, + "learning_rate": 4.213632569348639e-07, + "logits/chosen": -160167840.0, + "logits/rejected": -138572496.0, + "logps/chosen": -290.84521412884334, + "logps/rejected": -290.7674309045226, + "loss": 1.5273, + "rewards/chosen": -0.013026183032012091, + "rewards/margins": 2.366796105493951, + "rewards/rejected": -2.379822288525963, + "step": 320 + }, + { + "epoch": 0.34554973821989526, + "grad_norm": 37.133567810058594, + "kl": 0.0, + "learning_rate": 4.145928543880249e-07, + "logits/chosen": -143369248.0, + "logits/rejected": -145142368.0, + "logps/chosen": -288.8722847551343, + "logps/rejected": -284.5384949768161, + "loss": 1.4706, + "rewards/chosen": 0.3644241646379468, + "rewards/margins": 2.3581714100373867, + "rewards/rejected": -1.9937472453994398, + "step": 330 + }, + { + "epoch": 0.35602094240837695, + "grad_norm": 42.86240005493164, + "kl": 0.0, + "learning_rate": 4.076023234872057e-07, + "logits/chosen": -134226816.0, + "logits/rejected": -150647472.0, + "logps/chosen": -290.2114297253635, + "logps/rejected": -284.6018816187595, + "loss": 1.4998, + "rewards/chosen": 0.00031860425514627927, + "rewards/margins": 2.320413741535309, + "rewards/rejected": -2.3200951372801626, + "step": 340 + }, + { + "epoch": 0.36649214659685864, + "grad_norm": 43.323272705078125, + "kl": 0.0, + "learning_rate": 4.004010134478771e-07, + "logits/chosen": -153940128.0, + "logits/rejected": -142368416.0, + "logps/chosen": -277.28628954475306, + "logps/rejected": -281.2114319620253, + "loss": 1.4997, + "rewards/chosen": 0.20580647315508055, + "rewards/margins": 2.183485568845844, + "rewards/rejected": -1.9776790956907635, + "step": 350 + }, + { + "epoch": 0.3769633507853403, + "grad_norm": 46.848731994628906, + "kl": 0.0, + "learning_rate": 3.9299855538392534e-07, + "logits/chosen": -146128720.0, + "logits/rejected": -144031184.0, + "logps/chosen": -288.1243632445141, + "logps/rejected": -282.6408197040498, + "loss": 1.4687, + "rewards/chosen": 0.38341842699200385, + "rewards/margins": 2.226742892164317, + "rewards/rejected": -1.843324465172313, + "step": 360 + }, + { + "epoch": 0.387434554973822, + "grad_norm": 41.83399963378906, + "kl": 0.0, + "learning_rate": 3.8540484942689075e-07, + "logits/chosen": -145012880.0, + "logits/rejected": -147854064.0, + "logps/chosen": -280.8901771496815, + "logps/rejected": -297.4892398389571, + "loss": 1.4964, + "rewards/chosen": 0.17396898937832778, + "rewards/margins": 2.185319656196771, + "rewards/rejected": -2.0113506668184433, + "step": 370 + }, + { + "epoch": 0.39790575916230364, + "grad_norm": 41.95068359375, + "kl": 0.0, + "learning_rate": 3.77630051485419e-07, + "logits/chosen": -158335904.0, + "logits/rejected": -135963312.0, + "logps/chosen": -299.00375645280235, + "logps/rejected": -290.30250726744185, + "loss": 1.5113, + "rewards/chosen": -0.136355295997102, + "rewards/margins": 2.5501342781437826, + "rewards/rejected": -2.6864895741408845, + "step": 380 + }, + { + "epoch": 0.4083769633507853, + "grad_norm": 52.55752944946289, + "kl": 0.0, + "learning_rate": 3.696845596626342e-07, + "logits/chosen": -136675232.0, + "logits/rejected": -139554592.0, + "logps/chosen": -267.1172371031746, + "logps/rejected": -287.2497836538462, + "loss": 1.4728, + "rewards/chosen": 0.29143521747891865, + "rewards/margins": 2.468798123127957, + "rewards/rejected": -2.1773629056490384, + "step": 390 + }, + { + "epoch": 0.418848167539267, + "grad_norm": 42.524559020996094, + "kl": 0.0, + "learning_rate": 3.61579000349597e-07, + "logits/chosen": -145115344.0, + "logits/rejected": -138730784.0, + "logps/chosen": -291.6719227828746, + "logps/rejected": -286.7621056309904, + "loss": 1.404, + "rewards/chosen": 0.3901278061239727, + "rewards/margins": 2.5747456987457777, + "rewards/rejected": -2.184617892621805, + "step": 400 + }, + { + "epoch": 0.418848167539267, + "eval_kl": 0.0, + "eval_logits/chosen": -143785152.0, + "eval_logits/rejected": -142386976.0, + "eval_logps/chosen": -288.1983125, + "eval_logps/rejected": -290.82553125, + "eval_loss": 0.3773096799850464, + "eval_rewards/chosen": -0.03424349975585937, + "eval_rewards/margins": 2.3531070861816406, + "eval_rewards/rejected": -2.3873505859375, + "eval_runtime": 92.585, + "eval_samples_per_second": 43.204, + "eval_steps_per_second": 1.35, + "step": 400 + }, + { + "epoch": 0.4293193717277487, + "grad_norm": 42.05149459838867, + "kl": 0.0, + "learning_rate": 3.5332421401344837e-07, + "logits/chosen": -123967896.0, + "logits/rejected": -151420832.0, + "logps/chosen": -290.8375683922559, + "logps/rejected": -284.5394497084548, + "loss": 1.5018, + "rewards/chosen": -0.1862513866488781, + "rewards/margins": 2.4929647368820076, + "rewards/rejected": -2.679216123530886, + "step": 410 + }, + { + "epoch": 0.4397905759162304, + "grad_norm": 45.76744842529297, + "kl": 0.0, + "learning_rate": 3.4493124069924635e-07, + "logits/chosen": -141672128.0, + "logits/rejected": -143752144.0, + "logps/chosen": -296.493825, + "logps/rejected": -276.59630248091605, + "loss": 1.489, + "rewards/chosen": -0.059238671875, + "rewards/margins": 2.682879686009065, + "rewards/rejected": -2.742118357884065, + "step": 420 + }, + { + "epoch": 0.450261780104712, + "grad_norm": 48.224552154541016, + "kl": 0.0, + "learning_rate": 3.3641130526488335e-07, + "logits/chosen": -128212800.0, + "logits/rejected": -138041008.0, + "logps/chosen": -262.0190345368917, + "logps/rejected": -302.6821539657854, + "loss": 1.5488, + "rewards/chosen": 0.1527433649898511, + "rewards/margins": 2.138388427805753, + "rewards/rejected": -1.985645062815902, + "step": 430 + }, + { + "epoch": 0.4607329842931937, + "grad_norm": 38.78076171875, + "kl": 0.0, + "learning_rate": 3.2777580236883473e-07, + "logits/chosen": -137593360.0, + "logits/rejected": -143156048.0, + "logps/chosen": -262.82073682108626, + "logps/rejected": -282.15691896024464, + "loss": 1.4863, + "rewards/chosen": 0.48399031276520066, + "rewards/margins": 2.222597083598534, + "rewards/rejected": -1.7386067708333333, + "step": 440 + }, + { + "epoch": 0.4712041884816754, + "grad_norm": 32.755828857421875, + "kl": 0.0, + "learning_rate": 3.1903628123081196e-07, + "logits/chosen": -145392688.0, + "logits/rejected": -133878232.0, + "logps/chosen": -279.9309734083851, + "logps/rejected": -278.34424135220127, + "loss": 1.4144, + "rewards/chosen": 0.4770013560419497, + "rewards/margins": 2.7351111625106013, + "rewards/rejected": -2.2581098064686516, + "step": 450 + }, + { + "epoch": 0.4816753926701571, + "grad_norm": 42.82301330566406, + "kl": 0.0, + "learning_rate": 3.1020443018570556e-07, + "logits/chosen": -127823832.0, + "logits/rejected": -148714048.0, + "logps/chosen": -277.72029728084414, + "logps/rejected": -274.04221573795184, + "loss": 1.4946, + "rewards/chosen": 0.2742920664997844, + "rewards/margins": 2.2432816111039826, + "rewards/rejected": -1.968989544604198, + "step": 460 + }, + { + "epoch": 0.49214659685863876, + "grad_norm": 35.25486755371094, + "kl": 0.0, + "learning_rate": 3.0129206105147343e-07, + "logits/chosen": -128224592.0, + "logits/rejected": -141323744.0, + "logps/chosen": -297.7064896003263, + "logps/rejected": -277.0822245127436, + "loss": 1.5014, + "rewards/chosen": -0.1534212246221197, + "rewards/margins": 2.4885884590126253, + "rewards/rejected": -2.642009683634745, + "step": 470 + }, + { + "epoch": 0.5026178010471204, + "grad_norm": 49.871315002441406, + "kl": 0.0, + "learning_rate": 2.923110933318805e-07, + "logits/chosen": -138666448.0, + "logits/rejected": -125876032.0, + "logps/chosen": -282.11912313432833, + "logps/rejected": -271.0550204918033, + "loss": 1.5235, + "rewards/chosen": -0.26497638759328357, + "rewards/margins": 2.5066780853831507, + "rewards/rejected": -2.7716544729764343, + "step": 480 + }, + { + "epoch": 0.5130890052356021, + "grad_norm": 39.03130340576172, + "kl": 0.0, + "learning_rate": 2.832735382752194e-07, + "logits/chosen": -144244752.0, + "logits/rejected": -139237664.0, + "logps/chosen": -280.307546898928, + "logps/rejected": -291.85860247208933, + "loss": 1.5082, + "rewards/chosen": -0.07937168746410796, + "rewards/margins": 2.5424585728902582, + "rewards/rejected": -2.621830260354366, + "step": 490 + }, + { + "epoch": 0.5235602094240838, + "grad_norm": 31.400175094604492, + "kl": 0.0, + "learning_rate": 2.741914828103307e-07, + "logits/chosen": -134795200.0, + "logits/rejected": -140993584.0, + "logps/chosen": -274.8821624803768, + "logps/rejected": -273.449115474339, + "loss": 1.4409, + "rewards/chosen": 0.3371262003900118, + "rewards/margins": 2.429343961677434, + "rewards/rejected": -2.092217761287422, + "step": 500 + }, + { + "epoch": 0.5340314136125655, + "grad_norm": 39.398651123046875, + "kl": 0.0, + "learning_rate": 2.650770733814065e-07, + "logits/chosen": -139524336.0, + "logits/rejected": -137561184.0, + "logps/chosen": -281.1636513157895, + "logps/rejected": -272.26749138591117, + "loss": 1.4701, + "rewards/chosen": 0.545446806547174, + "rewards/margins": 2.443065145583353, + "rewards/rejected": -1.8976183390361792, + "step": 510 + }, + { + "epoch": 0.5445026178010471, + "grad_norm": 40.88848114013672, + "kl": 0.0, + "learning_rate": 2.55942499703198e-07, + "logits/chosen": -147061424.0, + "logits/rejected": -143406240.0, + "logps/chosen": -285.4889, + "logps/rejected": -274.23685591603055, + "loss": 1.4519, + "rewards/chosen": 0.4090586181640625, + "rewards/margins": 2.4761641167327646, + "rewards/rejected": -2.067105498568702, + "step": 520 + }, + { + "epoch": 0.5549738219895288, + "grad_norm": 41.25908660888672, + "kl": 0.0, + "learning_rate": 2.467999784583527e-07, + "logits/chosen": -131054160.0, + "logits/rejected": -139775840.0, + "logps/chosen": -270.7588608226837, + "logps/rejected": -278.70869170489294, + "loss": 1.4667, + "rewards/chosen": 0.1376024556997866, + "rewards/margins": 2.5865234959926955, + "rewards/rejected": -2.448921040292909, + "step": 530 + }, + { + "epoch": 0.5654450261780105, + "grad_norm": 58.745155334472656, + "kl": 0.0, + "learning_rate": 2.3766173695868388e-07, + "logits/chosen": -139035088.0, + "logits/rejected": -133750928.0, + "logps/chosen": -290.9726024119449, + "logps/rejected": -290.5950209330144, + "loss": 1.5503, + "rewards/chosen": -0.12228842205146918, + "rewards/margins": 2.3166335900099346, + "rewards/rejected": -2.4389220120614037, + "step": 540 + }, + { + "epoch": 0.5759162303664922, + "grad_norm": 66.44160461425781, + "kl": 0.0, + "learning_rate": 2.285399967922253e-07, + "logits/chosen": -140837504.0, + "logits/rejected": -148332576.0, + "logps/chosen": -269.52528454472844, + "logps/rejected": -282.80800840978594, + "loss": 1.4314, + "rewards/chosen": -0.13683468998430637, + "rewards/margins": 2.8514840770383425, + "rewards/rejected": -2.988318767022649, + "step": 550 + }, + { + "epoch": 0.5863874345549738, + "grad_norm": 34.65999221801758, + "kl": 0.0, + "learning_rate": 2.194469574779397e-07, + "logits/chosen": -155893536.0, + "logits/rejected": -136393088.0, + "logps/chosen": -289.53893209408193, + "logps/rejected": -284.4916465378422, + "loss": 1.4864, + "rewards/chosen": -0.09103836383732751, + "rewards/margins": 2.7043548873452408, + "rewards/rejected": -2.7953932511825683, + "step": 560 + }, + { + "epoch": 0.5968586387434555, + "grad_norm": 50.38192367553711, + "kl": 0.0, + "learning_rate": 2.1039478014994441e-07, + "logits/chosen": -140968768.0, + "logits/rejected": -132994816.0, + "logps/chosen": -269.5297433903577, + "logps/rejected": -288.17197311616957, + "loss": 1.4443, + "rewards/chosen": 0.11533069758912082, + "rewards/margins": 2.7949352204379236, + "rewards/rejected": -2.679604522848803, + "step": 570 + }, + { + "epoch": 0.6073298429319371, + "grad_norm": 39.10985565185547, + "kl": 0.0, + "learning_rate": 2.0139557129307149e-07, + "logits/chosen": -141384624.0, + "logits/rejected": -141585264.0, + "logps/chosen": -298.5900179140127, + "logps/rejected": -308.7128067484663, + "loss": 1.4304, + "rewards/chosen": -0.01027871393094397, + "rewards/margins": 2.8328863970257276, + "rewards/rejected": -2.843165110956672, + "step": 580 + }, + { + "epoch": 0.6178010471204188, + "grad_norm": 48.67289733886719, + "kl": 0.0, + "learning_rate": 1.9246136655151808e-07, + "logits/chosen": -145905728.0, + "logits/rejected": -138221376.0, + "logps/chosen": -293.7313226744186, + "logps/rejected": -306.55437992125985, + "loss": 1.4509, + "rewards/chosen": -0.13455553868020229, + "rewards/margins": 2.8649036994472583, + "rewards/rejected": -2.9994592381274607, + "step": 590 + }, + { + "epoch": 0.6282722513089005, + "grad_norm": 68.55772399902344, + "kl": 0.0, + "learning_rate": 1.8360411463223873e-07, + "logits/chosen": -136852608.0, + "logits/rejected": -143516144.0, + "logps/chosen": -284.4403070349762, + "logps/rejected": -294.88001632104454, + "loss": 1.4253, + "rewards/chosen": -0.10330413672806538, + "rewards/margins": 2.9733640825379677, + "rewards/rejected": -3.076668219266033, + "step": 600 + }, + { + "epoch": 0.6282722513089005, + "eval_kl": 0.0, + "eval_logits/chosen": -145117536.0, + "eval_logits/rejected": -143700400.0, + "eval_logps/chosen": -291.06696875, + "eval_logps/rejected": -298.3589375, + "eval_loss": 0.36837950348854065, + "eval_rewards/chosen": -0.32111080932617186, + "eval_rewards/margins": 2.819581085205078, + "eval_rewards/rejected": -3.14069189453125, + "eval_runtime": 92.5853, + "eval_samples_per_second": 43.203, + "eval_steps_per_second": 1.35, + "step": 600 + }, + { + "epoch": 0.6387434554973822, + "grad_norm": 46.867210388183594, + "kl": 0.0, + "learning_rate": 1.7483566132460865e-07, + "logits/chosen": -136255600.0, + "logits/rejected": -144252464.0, + "logps/chosen": -299.9070411392405, + "logps/rejected": -282.2795138888889, + "loss": 1.4918, + "rewards/chosen": -0.4866646392435967, + "rewards/margins": 2.6991346651063264, + "rewards/rejected": -3.185799304349923, + "step": 610 + }, + { + "epoch": 0.6492146596858639, + "grad_norm": 66.88858032226562, + "kl": 0.0, + "learning_rate": 1.66167733657731e-07, + "logits/chosen": -140277344.0, + "logits/rejected": -142182944.0, + "logps/chosen": -301.9623953349282, + "logps/rejected": -294.2529192189893, + "loss": 1.4825, + "rewards/chosen": -0.35411040140301037, + "rewards/margins": 2.692060965126469, + "rewards/rejected": -3.0461713665294794, + "step": 620 + }, + { + "epoch": 0.6596858638743456, + "grad_norm": 62.48853302001953, + "kl": 0.0, + "learning_rate": 1.5761192421657456e-07, + "logits/chosen": -133893392.0, + "logits/rejected": -143692496.0, + "logps/chosen": -292.95065395367413, + "logps/rejected": -299.167311735474, + "loss": 1.4055, + "rewards/chosen": 0.11951812159139127, + "rewards/margins": 2.9790377767782226, + "rewards/rejected": -2.859519655186831, + "step": 630 + }, + { + "epoch": 0.6701570680628273, + "grad_norm": 38.459293365478516, + "kl": 0.0, + "learning_rate": 1.491796756379185e-07, + "logits/chosen": -148631472.0, + "logits/rejected": -137124976.0, + "logps/chosen": -307.80620335820896, + "logps/rejected": -284.41946721311473, + "loss": 1.4798, + "rewards/chosen": 0.12505948650303172, + "rewards/margins": 2.7576216423047737, + "rewards/rejected": -2.632562155801742, + "step": 640 + }, + { + "epoch": 0.680628272251309, + "grad_norm": 51.62284469604492, + "kl": 0.0, + "learning_rate": 1.4088226530684071e-07, + "logits/chosen": -145016352.0, + "logits/rejected": -135913600.0, + "logps/chosen": -293.0742607526882, + "logps/rejected": -286.65267289348174, + "loss": 1.4106, + "rewards/chosen": 0.37428661059307794, + "rewards/margins": 2.8034505085213373, + "rewards/rejected": -2.4291638979282593, + "step": 650 + }, + { + "epoch": 0.6910994764397905, + "grad_norm": 44.217506408691406, + "kl": 0.0, + "learning_rate": 1.327307902742142e-07, + "logits/chosen": -153775056.0, + "logits/rejected": -142987488.0, + "logps/chosen": -278.56211538461537, + "logps/rejected": -295.2840277777778, + "loss": 1.4113, + "rewards/chosen": 0.3199942486102764, + "rewards/margins": 3.0220053085681133, + "rewards/rejected": -2.702011059957837, + "step": 660 + }, + { + "epoch": 0.7015706806282722, + "grad_norm": 52.56444549560547, + "kl": 0.0, + "learning_rate": 1.2473615241538523e-07, + "logits/chosen": -138428624.0, + "logits/rejected": -125599760.0, + "logps/chosen": -270.62024962742174, + "logps/rejected": -297.9065578817734, + "loss": 1.5102, + "rewards/chosen": 0.21654559389844916, + "rewards/margins": 2.3964325355420244, + "rewards/rejected": -2.1798869416435753, + "step": 670 + }, + { + "epoch": 0.7120418848167539, + "grad_norm": 38.48976516723633, + "kl": 0.0, + "learning_rate": 1.169090438498816e-07, + "logits/chosen": -140096608.0, + "logits/rejected": -141314656.0, + "logps/chosen": -289.16740023474176, + "logps/rejected": -289.05847796411854, + "loss": 1.4, + "rewards/chosen": 0.4871681464110182, + "rewards/margins": 2.8861619137789, + "rewards/rejected": -2.398993767367882, + "step": 680 + }, + { + "epoch": 0.7225130890052356, + "grad_norm": 56.998104095458984, + "kl": 0.0, + "learning_rate": 1.0925993264165045e-07, + "logits/chosen": -136509200.0, + "logits/rejected": -140580992.0, + "logps/chosen": -284.6041084265176, + "logps/rejected": -296.2347333715596, + "loss": 1.4483, + "rewards/chosen": 0.1843722529304675, + "rewards/margins": 2.723567089094936, + "rewards/rejected": -2.5391948361644685, + "step": 690 + }, + { + "epoch": 0.7329842931937173, + "grad_norm": 45.59560012817383, + "kl": 0.0, + "learning_rate": 1.0179904879894998e-07, + "logits/chosen": -139792672.0, + "logits/rejected": -133128280.0, + "logps/chosen": -282.0086206896552, + "logps/rejected": -297.9873685747664, + "loss": 1.4197, + "rewards/chosen": 0.06277323516558704, + "rewards/margins": 3.0981276117475574, + "rewards/rejected": -3.0353543765819704, + "step": 700 + }, + { + "epoch": 0.743455497382199, + "grad_norm": 48.215816497802734, + "kl": 0.0, + "learning_rate": 9.453637059262117e-08, + "logits/chosen": -127794064.0, + "logits/rejected": -130284464.0, + "logps/chosen": -276.90582061068704, + "logps/rejected": -275.3603, + "loss": 1.5249, + "rewards/chosen": -0.14867283478947996, + "rewards/margins": 2.47092736052302, + "rewards/rejected": -2.6196001953125, + "step": 710 + }, + { + "epoch": 0.7539267015706806, + "grad_norm": 61.68962097167969, + "kl": 0.0, + "learning_rate": 8.748161121103406e-08, + "logits/chosen": -140951328.0, + "logits/rejected": -141405104.0, + "logps/chosen": -288.2717027559055, + "logps/rejected": -306.3112403100775, + "loss": 1.3695, + "rewards/chosen": 0.361008819820374, + "rewards/margins": 3.0993951070660133, + "rewards/rejected": -2.7383862872456395, + "step": 720 + }, + { + "epoch": 0.7643979057591623, + "grad_norm": 43.57563400268555, + "kl": 0.0, + "learning_rate": 8.064420576955965e-08, + "logits/chosen": -144350032.0, + "logits/rejected": -144956128.0, + "logps/chosen": -289.1480224609375, + "logps/rejected": -297.3230224609375, + "loss": 1.4858, + "rewards/chosen": 0.04629603624343872, + "rewards/margins": 2.6986050724983217, + "rewards/rejected": -2.652309036254883, + "step": 730 + }, + { + "epoch": 0.774869109947644, + "grad_norm": 47.147090911865234, + "kl": 0.0, + "learning_rate": 7.403329869193922e-08, + "logits/chosen": -135583312.0, + "logits/rejected": -131832256.0, + "logps/chosen": -277.6656105990783, + "logps/rejected": -275.17182531796504, + "loss": 1.3656, + "rewards/chosen": 0.1653434061967466, + "rewards/margins": 3.2219976161281854, + "rewards/rejected": -3.056654209931439, + "step": 740 + }, + { + "epoch": 0.7853403141361257, + "grad_norm": 44.07842254638672, + "kl": 0.0, + "learning_rate": 6.765773148042858e-08, + "logits/chosen": -143625632.0, + "logits/rejected": -132837352.0, + "logps/chosen": -285.6507056451613, + "logps/rejected": -281.92257054848966, + "loss": 1.4614, + "rewards/chosen": 0.21915514311665946, + "rewards/margins": 2.62252863091972, + "rewards/rejected": -2.4033734878030604, + "step": 750 + }, + { + "epoch": 0.7958115183246073, + "grad_norm": 37.42493438720703, + "kl": 0.0, + "learning_rate": 6.152603089107139e-08, + "logits/chosen": -136608224.0, + "logits/rejected": -131950376.0, + "logps/chosen": -276.34929128614914, + "logps/rejected": -277.7825792536116, + "loss": 1.4942, + "rewards/chosen": 0.24006759666779634, + "rewards/margins": 2.4807506731374995, + "rewards/rejected": -2.240683076469703, + "step": 760 + }, + { + "epoch": 0.806282722513089, + "grad_norm": 68.92852020263672, + "kl": 0.0, + "learning_rate": 5.5646397529920175e-08, + "logits/chosen": -132386256.0, + "logits/rejected": -138109456.0, + "logps/chosen": -303.72342011128774, + "logps/rejected": -284.04025057603684, + "loss": 1.3831, + "rewards/chosen": 0.32797061695772056, + "rewards/margins": 2.99956472398057, + "rewards/rejected": -2.6715941070228495, + "step": 770 + }, + { + "epoch": 0.8167539267015707, + "grad_norm": 36.25162124633789, + "kl": 0.0, + "learning_rate": 5.002669488545111e-08, + "logits/chosen": -126804304.0, + "logits/rejected": -149925328.0, + "logps/chosen": -280.9178725369458, + "logps/rejected": -298.3454033159463, + "loss": 1.4707, + "rewards/chosen": 0.24425755893851345, + "rewards/margins": 2.512562284278491, + "rewards/rejected": -2.2683047253399775, + "step": 780 + }, + { + "epoch": 0.8272251308900523, + "grad_norm": 45.3673095703125, + "kl": 0.0, + "learning_rate": 4.467443881184646e-08, + "logits/chosen": -137377824.0, + "logits/rejected": -140888416.0, + "logps/chosen": -284.73014937106916, + "logps/rejected": -271.3363742236025, + "loss": 1.4641, + "rewards/chosen": 0.15725456093842127, + "rewards/margins": 2.4592872211917207, + "rewards/rejected": -2.3020326602532997, + "step": 790 + }, + { + "epoch": 0.837696335078534, + "grad_norm": 44.92776870727539, + "kl": 0.0, + "learning_rate": 3.959678747720488e-08, + "logits/chosen": -147636928.0, + "logits/rejected": -129594688.0, + "logps/chosen": -278.11655092592594, + "logps/rejected": -284.1502840909091, + "loss": 1.4432, + "rewards/chosen": 0.22765783239293982, + "rewards/margins": 2.823709614417733, + "rewards/rejected": -2.5960517820247935, + "step": 800 + }, + { + "epoch": 0.837696335078534, + "eval_kl": 0.0, + "eval_logits/chosen": -140467840.0, + "eval_logits/rejected": -139209600.0, + "eval_logps/chosen": -286.2336875, + "eval_logps/rejected": -292.39625, + "eval_loss": 0.3657679557800293, + "eval_rewards/chosen": 0.16221832275390624, + "eval_rewards/margins": 2.7066421508789062, + "eval_rewards/rejected": -2.544423828125, + "eval_runtime": 92.5899, + "eval_samples_per_second": 43.201, + "eval_steps_per_second": 1.35, + "step": 800 + }, + { + "epoch": 0.8481675392670157, + "grad_norm": 64.69525909423828, + "kl": 0.0, + "learning_rate": 3.480053179012654e-08, + "logits/chosen": -129839872.0, + "logits/rejected": -140454848.0, + "logps/chosen": -266.87487579491255, + "logps/rejected": -288.485599078341, + "loss": 1.5392, + "rewards/chosen": -0.023006766702867273, + "rewards/margins": 2.3186599749686, + "rewards/rejected": -2.341666741671467, + "step": 810 + }, + { + "epoch": 0.8586387434554974, + "grad_norm": 63.29912185668945, + "kl": 0.0, + "learning_rate": 3.029208631747446e-08, + "logits/chosen": -138798032.0, + "logits/rejected": -130695232.0, + "logps/chosen": -273.63010448619633, + "logps/rejected": -289.54072949840764, + "loss": 1.3905, + "rewards/chosen": 0.2373036811688195, + "rewards/margins": 3.154649639319695, + "rewards/rejected": -2.9173459581508756, + "step": 820 + }, + { + "epoch": 0.8691099476439791, + "grad_norm": 35.773826599121094, + "kl": 0.0, + "learning_rate": 2.607748070546037e-08, + "logits/chosen": -138341072.0, + "logits/rejected": -140245856.0, + "logps/chosen": -276.33953568611986, + "logps/rejected": -295.26535893962847, + "loss": 1.4761, + "rewards/chosen": 0.13098442968133872, + "rewards/margins": 2.7483675067300037, + "rewards/rejected": -2.617383077048665, + "step": 830 + }, + { + "epoch": 0.8795811518324608, + "grad_norm": 36.50845718383789, + "kl": 0.0, + "learning_rate": 2.2162351615526544e-08, + "logits/chosen": -140731280.0, + "logits/rejected": -148560064.0, + "logps/chosen": -301.4704117063492, + "logps/rejected": -290.1577644230769, + "loss": 1.4465, + "rewards/chosen": 0.18666185651506698, + "rewards/margins": 2.7894090019477593, + "rewards/rejected": -2.6027471454326925, + "step": 840 + }, + { + "epoch": 0.8900523560209425, + "grad_norm": 55.24102020263672, + "kl": 0.0, + "learning_rate": 1.8551935185811717e-08, + "logits/chosen": -132794856.0, + "logits/rejected": -138046480.0, + "logps/chosen": -282.9409226190476, + "logps/rejected": -302.71471153846153, + "loss": 1.4063, + "rewards/chosen": 0.10835386003766741, + "rewards/margins": 3.033290759076129, + "rewards/rejected": -2.9249368990384617, + "step": 850 + }, + { + "epoch": 0.900523560209424, + "grad_norm": 70.16515350341797, + "kl": 0.0, + "learning_rate": 1.5251060028279612e-08, + "logits/chosen": -143098928.0, + "logits/rejected": -126897944.0, + "logps/chosen": -272.46225367078824, + "logps/rejected": -305.9686759478673, + "loss": 1.5084, + "rewards/chosen": 0.03112101370621317, + "rewards/margins": 2.5436736215754867, + "rewards/rejected": -2.5125526078692735, + "step": 860 + }, + { + "epoch": 0.9109947643979057, + "grad_norm": 28.95891761779785, + "kl": 0.0, + "learning_rate": 1.2264140770878839e-08, + "logits/chosen": -137280736.0, + "logits/rejected": -143405088.0, + "logps/chosen": -299.961469889065, + "logps/rejected": -295.115875385208, + "loss": 1.4681, + "rewards/chosen": -0.01648792260800224, + "rewards/margins": 2.59923304083335, + "rewards/rejected": -2.615720963441352, + "step": 870 + }, + { + "epoch": 0.9214659685863874, + "grad_norm": 37.45644760131836, + "kl": 0.0, + "learning_rate": 9.59517215336922e-09, + "logits/chosen": -128477976.0, + "logits/rejected": -129677584.0, + "logps/chosen": -280.0816461267606, + "logps/rejected": -291.42394695787834, + "loss": 1.4164, + "rewards/chosen": -0.018490225682982444, + "rewards/margins": 3.1044779987100557, + "rewards/rejected": -3.122968224393038, + "step": 880 + }, + { + "epoch": 0.9319371727748691, + "grad_norm": 43.50038528442383, + "kl": 0.0, + "learning_rate": 7.247723684711382e-09, + "logits/chosen": -137378768.0, + "logits/rejected": -126123072.0, + "logps/chosen": -271.5022151295732, + "logps/rejected": -294.2158453525641, + "loss": 1.4426, + "rewards/chosen": 0.19363278877444384, + "rewards/margins": 2.807892557529452, + "rewards/rejected": -2.614259768755008, + "step": 890 + }, + { + "epoch": 0.9424083769633508, + "grad_norm": 58.68267059326172, + "kl": 0.0, + "learning_rate": 5.224934869164976e-09, + "logits/chosen": -139006912.0, + "logits/rejected": -141954272.0, + "logps/chosen": -292.2946211507293, + "logps/rejected": -301.43368212669685, + "loss": 1.4844, + "rewards/chosen": -0.05539148785113515, + "rewards/margins": 2.60611269089368, + "rewards/rejected": -2.6615041787448153, + "step": 900 + }, + { + "epoch": 0.9528795811518325, + "grad_norm": 51.242652893066406, + "kl": 0.0, + "learning_rate": 3.529511007479946e-09, + "logits/chosen": -140018880.0, + "logits/rejected": -134809904.0, + "logps/chosen": -292.4076660906298, + "logps/rejected": -281.15220091414943, + "loss": 1.4564, + "rewards/chosen": 0.1075638450235815, + "rewards/margins": 2.6941378960967137, + "rewards/rejected": -2.586574051073132, + "step": 910 + }, + { + "epoch": 0.9633507853403142, + "grad_norm": 60.67096710205078, + "kl": 0.0, + "learning_rate": 2.1637195787966857e-09, + "logits/chosen": -132927744.0, + "logits/rejected": -145037952.0, + "logps/chosen": -294.478180176565, + "logps/rejected": -286.2923801369863, + "loss": 1.4298, + "rewards/chosen": 0.23743079906481993, + "rewards/margins": 2.895192835903671, + "rewards/rejected": -2.657762036838851, + "step": 920 + }, + { + "epoch": 0.9738219895287958, + "grad_norm": 44.87836837768555, + "kl": 0.0, + "learning_rate": 1.1293872080934963e-09, + "logits/chosen": -125942888.0, + "logits/rejected": -145652336.0, + "logps/chosen": -283.0784801136364, + "logps/rejected": -296.9692206325301, + "loss": 1.4259, + "rewards/chosen": 0.11567323858087714, + "rewards/margins": 2.9895860038999658, + "rewards/rejected": -2.873912765319089, + "step": 930 + }, + { + "epoch": 0.9842931937172775, + "grad_norm": 43.4354362487793, + "kl": 0.0, + "learning_rate": 4.2789722323760546e-10, + "logits/chosen": -142621872.0, + "logits/rejected": -136877328.0, + "logps/chosen": -288.74076066616766, + "logps/rejected": -292.02558210784315, + "loss": 1.4129, + "rewards/chosen": 0.27186810613392354, + "rewards/margins": 3.1179713504915623, + "rewards/rejected": -2.846103244357639, + "step": 940 + }, + { + "epoch": 0.9947643979057592, + "grad_norm": 49.643978118896484, + "kl": 0.0, + "learning_rate": 6.018780490690822e-11, + "logits/chosen": -147837520.0, + "logits/rejected": -131119704.0, + "logps/chosen": -285.26368371212124, + "logps/rejected": -282.18916330645163, + "loss": 1.4033, + "rewards/chosen": 0.14758417534105706, + "rewards/margins": 3.1335240379922666, + "rewards/rejected": -2.9859398626512097, + "step": 950 + }, + { + "epoch": 1.0, + "step": 955, + "total_flos": 0.0, + "train_loss": 1.5426912418834826, + "train_runtime": 5367.6535, + "train_samples_per_second": 22.779, + "train_steps_per_second": 0.178 + } + ], + "logging_steps": 10, + "max_steps": 955, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": false, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}