commit 77a5f1873410f8d9b096823926d04b70aef9b6e1 Author: ModelHub XC Date: Tue Apr 21 21:27:36 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: anssio/Llama-Poro-2-8B-Instruct Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..9b25935 --- /dev/null +++ b/README.md @@ -0,0 +1,219 @@ +--- +datasets: +- LumiOpen/poro2-instruction-collection +- nvidia/HelpSteer3 +language: +- fi +- en +license: llama3.3 +library_name: transformers +pipeline_tag: text-generation +--- +# Poro 2 8B Instruct Model Card + +Poro 2 8B Instruct is an instruction-following chatbot model created through supervised fine-tuning (SFT) and Direct Preference Optimization (DPO) of the Poro 2 8B Base model. This model is designed for conversational AI applications and instruction following in both Finnish and English. It was trained on a carefully curated mix of English and Finnish instruction data, followed by preference tuning to improve response quality. + +Poro 2 was created in a collaboration between [AMD Silo AI](https://www.amd.com/en/solutions/ai/silo-ai.html), the [TurkuNLP group](https://turkunlp.org/) of the University of Turku, and [High Performance Language Technologies](https://hplt-project.org/) (HPLT). Training was conducted on the [LUMI supercomputer](https://www.lumi-supercomputer.eu/), using compute resources generously provided by [CSC](https://csc.fi/) - IT Center for Science, Finland. + +This model demonstrates how continued pretraining followed by instruction tuning can efficiently add new language capabilities to existing models while maintaining strong conversational abilities in both the original and target languages. + +For more details on our training and data generation pipeline, check out our [Continued Pretraining Playbook](https://rocm.blogs.amd.com/artificial-intelligence/multilingual-continued-pretraining/README.html). + +## Poro 2 Model Family + +The Poro 2 model family includes both 8B and 70B models, and there are three different versions released of the Poro 2 models: a base model, a post-training SFT-only checkpoint, and the final instruct model which is the SFT model plus a round of DPO. + +| Model | Based on | Base Model | SFT | Instruct | +| :---: | :------: | :--------: | :-: | :------- | +| Poro 2 8B | Llama 3.1 8B | [Poro 2 8B Base](https://huggingface.co/LumiOpen/Llama-Poro-2-8B-base) | [Poro 2 8B SFT](https://huggingface.co/LumiOpen/Llama-Poro-2-8B-SFT) | [Poro 2 8B Instruct](https://huggingface.co/LumiOpen/Llama-Poro-2-8B-Instruct) | +| Poro 2 70B | Llama 3.1 70B | [Poro 2 70B Base](https://huggingface.co/LumiOpen/Llama-Poro-2-70B-base) | [Poro 2 70B SFT](https://huggingface.co/LumiOpen/Llama-Poro-2-70B-SFT) | [Poro 2 70B Instruct](https://huggingface.co/LumiOpen/Llama-Poro-2-70B-Instruct) | + +_What does Poro mean?_ Poro is the Finnish word for Reindeer! 🦌 These animals are native to Finland and hold a significant role in Finnish culture and history. + +## Model Overview + +Poro 2 8B Instruct is based on the Llama 3.1 8B architecture and has been fine-tuned for instruction following and conversational AI applications. The model supports both English and Finnish conversations. + +| Hyperparameter | Value | +| :------------- | :----: | +| n_parameters | 8.03B | +| n_layers | 32 | +| n_heads | 32 | +| n_kv_heads | 8 | +| d_model | 4096 | +| vocab_size | 128256 | +| max_sequence_length | 8192 | +| base_model | Llama-3.1-8B | + +## Training Process + +### Continued Pretraining +The base Poro 2 8B model was created through continued pretraining on 165B tokens of Finnish, English, code, and math data. + +### Supervised Fine-Tuning (SFT) +The SFT phase used 1.4M instruction-following examples in English and Finnish, including: +- English and Finnish Tulu 3 prompts with Llama-3.3-70B-Instruct responses +- Multi-turn conversations generated using the Magpie method +- Top-rated conversations from OASST2 and Avoin Avustaja datasets +- Translation samples from EuroParl + +We also release the [Poro 2 instruction collection](https://huggingface.co/datasets/LumiOpen/poro2-instruction-collection). + +### Direct Preference Optimization (DPO) +The final model underwent preference tuning using the [HelpSteer3](https://huggingface.co/datasets/nvidia/HelpSteer3) dataset to improve response quality and alignment. + +## Post-Training Hyperparameters + +### SFT +| Hyperparameter | Value | +| :------------: | :---: | +| Epochs | 2 | +| Global batch size | 64 | +| Learning rate | 5e-6 | +| LR scheduler | linear | +| Warmup ratio | 0.03 | +| Max sequence length | 4,096 | + +### DPO +| Hyperparameter | Value | +| :------------: | :---: | +| Epochs | 3 | +| Global batch size | 64 | +| Beta | 0.01 | +| Learning rate | 5e-7 | +| LR scheduler | cosine | +| Warmup ratio | 0.1 | +| Max length | 4,096 | + +## Evaluation Results + +Poro 2 8B Instruct shows substantial improvements in Finnish instruction-following capabilities compared to Llama 3.1 8B Instruct, while maintaining strong English performance. We also outperform [Gemma-2-9B-it](https://huggingface.co/google/gemma-2-9b-it) and [EuroLLM-9B-Instruct](https://huggingface.co/utter-project/EuroLLM-9B-Instruct) in Finnish. + +### Finnish Instruction Following +| | Poro 2 8B Instruct | Llama 3.1 8B Instruct | Gemma-2-9B-it | EuroLLM-9B-Instruct | +|-----------------|------------------|------------------------|------------------------|------------------------| +| IFEval Finnish | **66.54** | 47.31 | 55.82 | 44.17 | +| MTBench Finnish | **6.75** | 4.10 | 6.7 | 4.46 | +| AlpacaEval 2 Finnish | **28.89** | 2.05 | 21.85 | 8.15 | + + +### English Instruction Following +| | Poro 2 8B Instruct | Llama 3.1 8B Instruct | Gemma-2-9B-it | EuroLLM-9B-Instruct | +|-----------------|------------------|------------------------|------------------------|------------------------| +| IFEval | 79.29 | **79.48** | 72.45 | 61.36 | +| MTBench | 7.33 | 7.70 | **7.85** | 6.25 | +| AlpacaEval 2 | 35.30 | 32.70 | **46.67** | 15.87 | + + +### MTBench scores per-category + +| | Finnish | English | +|-----------------|---------|-----------| +| Coding | 4.95 | 5.8 | +| Extraction | 6.70 | 7.35 | +| Humanities | 8.55 | 8.85 | +| Math | 5.80 | 6.7 | +| Reasoning | 4.80 | 5.45 | +| Roleplay | 7.65 | 8.10 | +| STEM | 7.55 | 8.05 | +| Writing | 8.05 | 8.40 | + + +### MTBench scores per-turn + +| | Finnish | English | +|-----------------|---------|-----------| +| first turn | 7.23 | 7.75 | +| second turn | 6.27 | 6.92 | + + +### Pairwise Comparisons on MTBench +- **Finnish**: 85% win rate vs Llama 3.1 8B Instruct +- **Finnish**: 51% win rate vs Llama 3.3 70B Instruct +- **English**: 49% win rate vs Llama 3.1 8B Instruct + +**Overall**: ~24% average improvement in Finnish instruction-following benchmarks while maintaining English performance. + +## Usage + +```python +from transformers import AutoTokenizer, AutoModelForCausalLM +import torch + +model_name = "LumiOpen/Llama-Poro-2-8B-Instruct" +tokenizer = AutoTokenizer.from_pretrained(model_name) +model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype=torch.bfloat16, + device_map="auto" +) + +# Finnish conversation example +messages = [ + {"role": "user", "content": "Kerro minulle Suomen historiasta."} +] + +inputs = tokenizer.apply_chat_template( + messages, + add_generation_prompt=True, + return_tensors="pt" +) + +outputs = model.generate( + inputs, + max_new_tokens=500, + temperature=0.7, + do_sample=True, + pad_token_id=tokenizer.eos_token_id +) + +response = tokenizer.decode(outputs[0], skip_special_tokens=True) +print(response) +``` + +## Intended Use + +Poro 2 8B Instruct is designed for: +- Conversational AI applications in Finnish and English +- Question answering and information retrieval +- Content generation and creative writing +- Educational applications +- Customer service and support applications +- Translation between Finnish and English + +## Ethical Considerations and Limitations + +Poro 2 8B Instruct is an advanced conversational AI model optimized for English and Finnish instruction following. As with most AI-driven systems, this model may reflect imperfections, biases, and idiosyncrasies present in its training data. + +Key limitations: +- Limited proficiency in languages other than English and Finnish +- May occasionally generate biased, inappropriate, or factually incorrect content +- Performance may vary significantly for specialized or technical domains +- Context window limited to 8,192 tokens +- May struggle with very recent events (knowledge cutoff limitations) + +**Safety Considerations:** +- Users should verify important factual claims independently +- The model should not be used for medical, legal, or financial advice without human oversight +- Responses should be reviewed for appropriateness in sensitive contexts + +## License + +Built with Llama + +Poro 2 8B Instruct is released under the Llama 3.3 Community License. Please review the license terms before use. + +## Citation + +```bibtex +@misc{poro2_2025, + title={Poro 2: Continued Pretraining for Language Acquisition}, + author={Elaine Zosa and Jouni Louma and Kai Hakala and Antti Virtanen and Mika Koistinen and Risto Luukkonen and Akseli Reunamo and Sampo Pyysalo and Jonathan Burdge}, + year={2025}, + howpublished={LumiOpen} +} +``` + +## Acknowledgments + +We thank CSC - IT Center for Science, Finland for providing access to the LUMI supercomputer. This work was supported by the High Performance Language Technologies (HPLT) project and conducted in collaboration with TurkuNLP from the University of Turku. This project has received funding from the European Union's Horizon Europe research and innovation programme under grant agreement No 101070350. \ No newline at end of file diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..1d3e91c --- /dev/null +++ b/all_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 0.0, + "train_loss": 0.29230043576466097, + "train_runtime": 30177.164, + "train_samples": 30799, + "train_samples_per_second": 3.062, + "train_steps_per_second": 0.048 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..2c4bdc6 --- /dev/null +++ b/config.json @@ -0,0 +1,34 @@ +{ + "_name_or_path": "/scratch/project_462000353/zosaelai2/models/finnish-llama-3-8b-eng-fin-synthetic-rated-magpie-oasst2-avoin-avustaja-epochs-2-batch-64-packed", + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 500000.0, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.46.0", + "use_cache": true, + "vocab_size": 128256 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..2dd38cb --- /dev/null +++ b/generation_config.json @@ -0,0 +1,11 @@ +{ + "_from_model_config": true, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "transformers_version": "4.46.0", + "use_cache": false +} diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..45618c3 --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed9768a31d16348c4cb16eb02b43fecbcca38be7e72bd2df47e4e9235e1d6b72 +size 4976698672 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..cf7d78e --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26a2785379fb9561ab98d81470ab058f2f5b337c25d077002b3519819d127c8e +size 4999802720 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..cb19b6b --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61a9cab26ace4d8b5157700037003e0d61d22af092722f752e8d5f6719419fc9 +size 4915916176 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..3d286c9 --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:97ffedf0dc733e159ab15c321454ddbeea1f4ca4bae3303fdf4f11fadc61a36f +size 1168138808 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..0fd8120 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,298 @@ +{ + "metadata": { + "total_size": 16060522496 + }, + "weight_map": { + "lm_head.weight": "model-00004-of-00004.safetensors", + "model.embed_tokens.weight": "model-00001-of-00004.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", + "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", + "model.norm.weight": "model-00004-of-00004.safetensors" + } +} diff --git a/runs/May20_11-35-39_nid007965/events.out.tfevents.1747730366.nid007965.1560.0 b/runs/May20_11-35-39_nid007965/events.out.tfevents.1747730366.nid007965.1560.0 new file mode 100644 index 0000000..23b0c8f --- /dev/null +++ b/runs/May20_11-35-39_nid007965/events.out.tfevents.1747730366.nid007965.1560.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b130f85f90147f79aa28caa2cd4cee0675b04867d9816333213e5bc6c3a0858d +size 8679 diff --git a/runs/May20_11-50-31_nid005335/events.out.tfevents.1747731101.nid005335.86974.0 b/runs/May20_11-50-31_nid005335/events.out.tfevents.1747731101.nid005335.86974.0 new file mode 100644 index 0000000..feeab14 --- /dev/null +++ b/runs/May20_11-50-31_nid005335/events.out.tfevents.1747731101.nid005335.86974.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f407f4367a9ca0628ba3282e5483f37ea45e5652ceb5aa5e44a1eabc35bf1b80 +size 106609 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..344c826 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..1c1d8d5 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..a237e12 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2064 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- set date_string = \"26 Jul 2024\" %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message + builtin tools #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if builtin_tools is defined or tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{%- if builtin_tools is defined %}\n {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n {%- for arg_name, arg_val in tool_call.arguments | items %}\n {{- arg_name + '=\"' + arg_val + '\"' }}\n {%- if not loop.last %}\n {{- \", \" }}\n {%- endif %}\n {%- endfor %}\n {{- \")\" }}\n {%- else %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {%- endif %}\n {%- if builtin_tools is defined %}\n {#- This means we're in ipython mode #}\n {{- \"<|eom_id|>\" }}\n {%- else %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 8192, + "pad_token": "<|eot_id|>", + "tokenizer_class": "PreTrainedTokenizerFast" +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..1d3e91c --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 3.0, + "total_flos": 0.0, + "train_loss": 0.29230043576466097, + "train_runtime": 30177.164, + "train_samples": 30799, + "train_samples_per_second": 3.062, + "train_steps_per_second": 0.048 +} \ No newline at end of file diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..61be305 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,2217 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "eval_steps": 500, + "global_step": 1446, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.002074688796680498, + "grad_norm": 8.200224942309267, + "learning_rate": 3.4482758620689654e-09, + "logits/chosen": -0.7109375, + "logits/rejected": -0.94140625, + "logps/chosen": -336.0, + "logps/rejected": -288.0, + "loss": 0.6914, + "rewards/accuracies": 0.0, + "rewards/chosen": 0.0, + "rewards/margins": 0.0, + "rewards/rejected": 0.0, + "step": 1 + }, + { + "epoch": 0.02074688796680498, + "grad_norm": 8.985547005814288, + "learning_rate": 3.448275862068965e-08, + "logits/chosen": -1.40625, + "logits/rejected": -1.46875, + "logps/chosen": -380.0, + "logps/rejected": -326.0, + "loss": 0.692, + "rewards/accuracies": 0.1111111119389534, + "rewards/chosen": -0.0011138916015625, + "rewards/margins": -0.00055694580078125, + "rewards/rejected": -0.00055694580078125, + "step": 10 + }, + { + "epoch": 0.04149377593360996, + "grad_norm": 7.537541071519504, + "learning_rate": 6.89655172413793e-08, + "logits/chosen": -1.4296875, + "logits/rejected": -1.4453125, + "logps/chosen": -328.0, + "logps/rejected": -366.0, + "loss": 0.6914, + "rewards/accuracies": 0.25, + "rewards/chosen": 0.000751495361328125, + "rewards/margins": -0.00225830078125, + "rewards/rejected": 0.0030059814453125, + "step": 20 + }, + { + "epoch": 0.06224066390041494, + "grad_norm": 8.46410953809254, + "learning_rate": 1.0344827586206897e-07, + "logits/chosen": -1.375, + "logits/rejected": -1.46875, + "logps/chosen": -420.0, + "logps/rejected": -374.0, + "loss": 0.692, + "rewards/accuracies": 0.20000000298023224, + "rewards/chosen": 0.00238037109375, + "rewards/margins": 0.001129150390625, + "rewards/rejected": 0.001251220703125, + "step": 30 + }, + { + "epoch": 0.08298755186721991, + "grad_norm": 8.157147362592257, + "learning_rate": 1.379310344827586e-07, + "logits/chosen": -1.453125, + "logits/rejected": -1.5078125, + "logps/chosen": -432.0, + "logps/rejected": -388.0, + "loss": 0.6911, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.00250244140625, + "rewards/margins": 0.0052490234375, + "rewards/rejected": -0.00274658203125, + "step": 40 + }, + { + "epoch": 0.1037344398340249, + "grad_norm": 8.08592590477226, + "learning_rate": 1.7241379310344828e-07, + "logits/chosen": -1.40625, + "logits/rejected": -1.4296875, + "logps/chosen": -340.0, + "logps/rejected": -300.0, + "loss": 0.6909, + "rewards/accuracies": 0.3499999940395355, + "rewards/chosen": 0.0054931640625, + "rewards/margins": 0.00150299072265625, + "rewards/rejected": 0.003997802734375, + "step": 50 + }, + { + "epoch": 0.12448132780082988, + "grad_norm": 8.524351634398997, + "learning_rate": 2.0689655172413793e-07, + "logits/chosen": -1.4375, + "logits/rejected": -1.4375, + "logps/chosen": -506.0, + "logps/rejected": -478.0, + "loss": 0.6896, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.02001953125, + "rewards/margins": 0.0030059814453125, + "rewards/rejected": 0.0169677734375, + "step": 60 + }, + { + "epoch": 0.14522821576763487, + "grad_norm": 7.5678704019111365, + "learning_rate": 2.413793103448276e-07, + "logits/chosen": -1.4296875, + "logits/rejected": -1.46875, + "logps/chosen": -386.0, + "logps/rejected": -262.0, + "loss": 0.6877, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.028076171875, + "rewards/margins": 0.020263671875, + "rewards/rejected": 0.00775146484375, + "step": 70 + }, + { + "epoch": 0.16597510373443983, + "grad_norm": 7.513277000409331, + "learning_rate": 2.758620689655172e-07, + "logits/chosen": -1.3515625, + "logits/rejected": -1.375, + "logps/chosen": -320.0, + "logps/rejected": -312.0, + "loss": 0.6847, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": 0.029052734375, + "rewards/margins": 0.001739501953125, + "rewards/rejected": 0.0272216796875, + "step": 80 + }, + { + "epoch": 0.18672199170124482, + "grad_norm": 7.275100713194908, + "learning_rate": 3.103448275862069e-07, + "logits/chosen": -1.40625, + "logits/rejected": -1.4140625, + "logps/chosen": -452.0, + "logps/rejected": -404.0, + "loss": 0.6811, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.062255859375, + "rewards/margins": 0.04150390625, + "rewards/rejected": 0.020751953125, + "step": 90 + }, + { + "epoch": 0.2074688796680498, + "grad_norm": 7.279491499537065, + "learning_rate": 3.4482758620689656e-07, + "logits/chosen": -1.46875, + "logits/rejected": -1.4921875, + "logps/chosen": -488.0, + "logps/rejected": -470.0, + "loss": 0.6749, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": 0.08154296875, + "rewards/margins": 0.04296875, + "rewards/rejected": 0.038818359375, + "step": 100 + }, + { + "epoch": 0.22821576763485477, + "grad_norm": 7.0878060086303085, + "learning_rate": 3.793103448275862e-07, + "logits/chosen": -1.4375, + "logits/rejected": -1.40625, + "logps/chosen": -346.0, + "logps/rejected": -350.0, + "loss": 0.6672, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.078125, + "rewards/margins": 0.041748046875, + "rewards/rejected": 0.03662109375, + "step": 110 + }, + { + "epoch": 0.24896265560165975, + "grad_norm": 6.836599327469131, + "learning_rate": 4.1379310344827586e-07, + "logits/chosen": -1.453125, + "logits/rejected": -1.453125, + "logps/chosen": -436.0, + "logps/rejected": -328.0, + "loss": 0.6642, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.12109375, + "rewards/margins": 0.09521484375, + "rewards/rejected": 0.0260009765625, + "step": 120 + }, + { + "epoch": 0.2697095435684647, + "grad_norm": 7.277406568232138, + "learning_rate": 4.482758620689655e-07, + "logits/chosen": -1.5078125, + "logits/rejected": -1.421875, + "logps/chosen": -368.0, + "logps/rejected": -350.0, + "loss": 0.6436, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.0615234375, + "rewards/margins": 0.0908203125, + "rewards/rejected": -0.029052734375, + "step": 130 + }, + { + "epoch": 0.29045643153526973, + "grad_norm": 7.649612249719239, + "learning_rate": 4.827586206896552e-07, + "logits/chosen": -1.3984375, + "logits/rejected": -1.390625, + "logps/chosen": -362.0, + "logps/rejected": -322.0, + "loss": 0.6473, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": 0.046875, + "rewards/margins": 0.150390625, + "rewards/rejected": -0.103515625, + "step": 140 + }, + { + "epoch": 0.3112033195020747, + "grad_norm": 7.208353024534284, + "learning_rate": 4.99981778257793e-07, + "logits/chosen": -1.453125, + "logits/rejected": -1.4375, + "logps/chosen": -436.0, + "logps/rejected": -432.0, + "loss": 0.6378, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.05517578125, + "rewards/margins": 0.0341796875, + "rewards/rejected": -0.0888671875, + "step": 150 + }, + { + "epoch": 0.33195020746887965, + "grad_norm": 8.231246609192565, + "learning_rate": 4.998360202572815e-07, + "logits/chosen": -1.3359375, + "logits/rejected": -1.3984375, + "logps/chosen": -372.0, + "logps/rejected": -376.0, + "loss": 0.6266, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.003570556640625, + "rewards/margins": 0.1337890625, + "rewards/rejected": -0.1298828125, + "step": 160 + }, + { + "epoch": 0.35269709543568467, + "grad_norm": 7.711663165069283, + "learning_rate": 4.995445892440316e-07, + "logits/chosen": -1.40625, + "logits/rejected": -1.2578125, + "logps/chosen": -388.0, + "logps/rejected": -434.0, + "loss": 0.631, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1064453125, + "rewards/margins": 0.083984375, + "rewards/rejected": -0.1904296875, + "step": 170 + }, + { + "epoch": 0.37344398340248963, + "grad_norm": 8.152087874731409, + "learning_rate": 4.991076551440359e-07, + "logits/chosen": -1.2890625, + "logits/rejected": -1.2421875, + "logps/chosen": -450.0, + "logps/rejected": -448.0, + "loss": 0.6192, + "rewards/accuracies": 0.5, + "rewards/chosen": 0.021484375, + "rewards/margins": 0.1630859375, + "rewards/rejected": -0.1416015625, + "step": 180 + }, + { + "epoch": 0.3941908713692946, + "grad_norm": 8.294099174165812, + "learning_rate": 4.985254727224266e-07, + "logits/chosen": -1.2578125, + "logits/rejected": -1.21875, + "logps/chosen": -436.0, + "logps/rejected": -402.0, + "loss": 0.6019, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.146484375, + "rewards/margins": 0.232421875, + "rewards/rejected": -0.08642578125, + "step": 190 + }, + { + "epoch": 0.4149377593360996, + "grad_norm": 8.447421779277066, + "learning_rate": 4.977983814349285e-07, + "logits/chosen": -1.3125, + "logits/rejected": -1.359375, + "logps/chosen": -468.0, + "logps/rejected": -396.0, + "loss": 0.608, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.220703125, + "rewards/margins": 0.0311279296875, + "rewards/rejected": -0.251953125, + "step": 200 + }, + { + "epoch": 0.43568464730290457, + "grad_norm": 9.723114067333197, + "learning_rate": 4.969268052299307e-07, + "logits/chosen": -1.1796875, + "logits/rejected": -1.234375, + "logps/chosen": -356.0, + "logps/rejected": -366.0, + "loss": 0.5878, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.1279296875, + "rewards/margins": 0.29296875, + "rewards/rejected": -0.421875, + "step": 210 + }, + { + "epoch": 0.45643153526970953, + "grad_norm": 10.716738710931661, + "learning_rate": 4.959112523012938e-07, + "logits/chosen": -1.328125, + "logits/rejected": -1.359375, + "logps/chosen": -480.0, + "logps/rejected": -460.0, + "loss": 0.5766, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.328125, + "rewards/margins": 0.3671875, + "rewards/rejected": -0.6953125, + "step": 220 + }, + { + "epoch": 0.47717842323651455, + "grad_norm": 9.819110125640536, + "learning_rate": 4.947523147920345e-07, + "logits/chosen": -1.3203125, + "logits/rejected": -1.234375, + "logps/chosen": -548.0, + "logps/rejected": -450.0, + "loss": 0.5689, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.68359375, + "rewards/margins": 0.330078125, + "rewards/rejected": -1.015625, + "step": 230 + }, + { + "epoch": 0.4979253112033195, + "grad_norm": 11.406864477616395, + "learning_rate": 4.934506684490621e-07, + "logits/chosen": -1.2109375, + "logits/rejected": -1.1953125, + "logps/chosen": -448.0, + "logps/rejected": -442.0, + "loss": 0.5737, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.6796875, + "rewards/margins": 0.375, + "rewards/rejected": -1.0546875, + "step": 240 + }, + { + "epoch": 0.5186721991701245, + "grad_norm": 12.451204475119791, + "learning_rate": 4.920070722291682e-07, + "logits/chosen": -1.3515625, + "logits/rejected": -1.4140625, + "logps/chosen": -520.0, + "logps/rejected": -552.0, + "loss": 0.5527, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.8515625, + "rewards/margins": 0.7890625, + "rewards/rejected": -1.640625, + "step": 250 + }, + { + "epoch": 0.5394190871369294, + "grad_norm": 14.291210178551019, + "learning_rate": 4.904223678564975e-07, + "logits/chosen": -1.171875, + "logits/rejected": -1.1015625, + "logps/chosen": -498.0, + "logps/rejected": -450.0, + "loss": 0.5554, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.953125, + "rewards/margins": 0.32421875, + "rewards/rejected": -1.2734375, + "step": 260 + }, + { + "epoch": 0.5601659751037344, + "grad_norm": 13.863578096384135, + "learning_rate": 4.886974793317607e-07, + "logits/chosen": -1.171875, + "logits/rejected": -1.28125, + "logps/chosen": -516.0, + "logps/rejected": -532.0, + "loss": 0.5048, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -0.8203125, + "rewards/margins": 0.875, + "rewards/rejected": -1.6875, + "step": 270 + }, + { + "epoch": 0.5809128630705395, + "grad_norm": 12.53026425282876, + "learning_rate": 4.86833412393473e-07, + "logits/chosen": -1.0703125, + "logits/rejected": -1.1328125, + "logps/chosen": -432.0, + "logps/rejected": -452.0, + "loss": 0.5557, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.68359375, + "rewards/margins": 0.80078125, + "rewards/rejected": -1.4921875, + "step": 280 + }, + { + "epoch": 0.6016597510373444, + "grad_norm": 14.022892671657644, + "learning_rate": 4.848312539315334e-07, + "logits/chosen": -1.375, + "logits/rejected": -1.359375, + "logps/chosen": -488.0, + "logps/rejected": -454.0, + "loss": 0.5069, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.7890625, + "rewards/margins": 0.55859375, + "rewards/rejected": -1.34375, + "step": 290 + }, + { + "epoch": 0.6224066390041494, + "grad_norm": 16.89545464924121, + "learning_rate": 4.826921713534873e-07, + "logits/chosen": -1.2265625, + "logits/rejected": -1.28125, + "logps/chosen": -520.0, + "logps/rejected": -548.0, + "loss": 0.5104, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.67578125, + "rewards/margins": 0.74609375, + "rewards/rejected": -1.421875, + "step": 300 + }, + { + "epoch": 0.6431535269709544, + "grad_norm": 14.047474903550272, + "learning_rate": 4.804174119038404e-07, + "logits/chosen": -1.171875, + "logits/rejected": -1.15625, + "logps/chosen": -472.0, + "logps/rejected": -498.0, + "loss": 0.5325, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.640625, + "rewards/margins": 0.69140625, + "rewards/rejected": -1.328125, + "step": 310 + }, + { + "epoch": 0.6639004149377593, + "grad_norm": 11.967189628642249, + "learning_rate": 4.78008301936823e-07, + "logits/chosen": -1.2265625, + "logits/rejected": -1.1875, + "logps/chosen": -458.0, + "logps/rejected": -504.0, + "loss": 0.514, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.59375, + "rewards/margins": 0.734375, + "rewards/rejected": -1.328125, + "step": 320 + }, + { + "epoch": 0.6846473029045643, + "grad_norm": 13.820746186375771, + "learning_rate": 4.754662461430258e-07, + "logits/chosen": -1.3203125, + "logits/rejected": -1.328125, + "logps/chosen": -612.0, + "logps/rejected": -556.0, + "loss": 0.5133, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.15625, + "rewards/margins": 0.69140625, + "rewards/rejected": -1.84375, + "step": 330 + }, + { + "epoch": 0.7053941908713693, + "grad_norm": 15.47060039648899, + "learning_rate": 4.727927267303612e-07, + "logits/chosen": -1.171875, + "logits/rejected": -1.09375, + "logps/chosen": -452.0, + "logps/rejected": -454.0, + "loss": 0.5407, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -1.265625, + "rewards/margins": 0.5234375, + "rewards/rejected": -1.7890625, + "step": 340 + }, + { + "epoch": 0.7261410788381742, + "grad_norm": 11.835480559229415, + "learning_rate": 4.699893025598255e-07, + "logits/chosen": -1.1875, + "logits/rejected": -1.1953125, + "logps/chosen": -484.0, + "logps/rejected": -490.0, + "loss": 0.5124, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.1796875, + "rewards/margins": 0.384765625, + "rewards/rejected": -1.5703125, + "step": 350 + }, + { + "epoch": 0.7468879668049793, + "grad_norm": 17.32570297851737, + "learning_rate": 4.67057608236567e-07, + "logits/chosen": -1.0703125, + "logits/rejected": -1.0, + "logps/chosen": -436.0, + "logps/rejected": -468.0, + "loss": 0.4606, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.234375, + "rewards/margins": 0.796875, + "rewards/rejected": -2.03125, + "step": 360 + }, + { + "epoch": 0.7676348547717843, + "grad_norm": 14.712913827549949, + "learning_rate": 4.6399935315678893e-07, + "logits/chosen": -1.0703125, + "logits/rejected": -1.0859375, + "logps/chosen": -552.0, + "logps/rejected": -498.0, + "loss": 0.4847, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.78125, + "rewards/margins": 0.6328125, + "rewards/rejected": -2.40625, + "step": 370 + }, + { + "epoch": 0.7883817427385892, + "grad_norm": 15.253980446488892, + "learning_rate": 4.608163205110447e-07, + "logits/chosen": -1.3125, + "logits/rejected": -1.359375, + "logps/chosen": -544.0, + "logps/rejected": -572.0, + "loss": 0.4847, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.890625, + "rewards/margins": 0.765625, + "rewards/rejected": -1.65625, + "step": 380 + }, + { + "epoch": 0.8091286307053942, + "grad_norm": 14.996759932569487, + "learning_rate": 4.5751036624450445e-07, + "logits/chosen": -1.4140625, + "logits/rejected": -1.34375, + "logps/chosen": -552.0, + "logps/rejected": -540.0, + "loss": 0.4827, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.046875, + "rewards/margins": 0.375, + "rewards/rejected": -1.421875, + "step": 390 + }, + { + "epoch": 0.8298755186721992, + "grad_norm": 16.766050665345595, + "learning_rate": 4.540834179748012e-07, + "logits/chosen": -1.171875, + "logits/rejected": -1.25, + "logps/chosen": -420.0, + "logps/rejected": -528.0, + "loss": 0.4556, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.21875, + "rewards/margins": 0.60546875, + "rewards/rejected": -1.8203125, + "step": 400 + }, + { + "epoch": 0.8506224066390041, + "grad_norm": 19.558534780127147, + "learning_rate": 4.5053747386808564e-07, + "logits/chosen": -1.1953125, + "logits/rejected": -1.3203125, + "logps/chosen": -508.0, + "logps/rejected": -688.0, + "loss": 0.4707, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": -1.3671875, + "rewards/margins": 0.9296875, + "rewards/rejected": -2.296875, + "step": 410 + }, + { + "epoch": 0.8713692946058091, + "grad_norm": 14.183474677606634, + "learning_rate": 4.4687460147394706e-07, + "logits/chosen": -1.3125, + "logits/rejected": -1.3515625, + "logps/chosen": -532.0, + "logps/rejected": -588.0, + "loss": 0.4869, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.1796875, + "rewards/margins": 0.73046875, + "rewards/rejected": -1.9140625, + "step": 420 + }, + { + "epoch": 0.8921161825726142, + "grad_norm": 13.143331743152638, + "learning_rate": 4.4309693651987726e-07, + "logits/chosen": -1.328125, + "logits/rejected": -1.28125, + "logps/chosen": -624.0, + "logps/rejected": -600.0, + "loss": 0.4787, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.1484375, + "rewards/margins": 1.0390625, + "rewards/rejected": -2.1875, + "step": 430 + }, + { + "epoch": 0.9128630705394191, + "grad_norm": 16.10745504932835, + "learning_rate": 4.3920668166598273e-07, + "logits/chosen": -1.3671875, + "logits/rejected": -1.40625, + "logps/chosen": -476.0, + "logps/rejected": -520.0, + "loss": 0.423, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -1.0703125, + "rewards/margins": 0.88671875, + "rewards/rejected": -1.9609375, + "step": 440 + }, + { + "epoch": 0.9336099585062241, + "grad_norm": 17.6229119736944, + "learning_rate": 4.352061052206695e-07, + "logits/chosen": -1.28125, + "logits/rejected": -1.2578125, + "logps/chosen": -580.0, + "logps/rejected": -644.0, + "loss": 0.4367, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.140625, + "rewards/margins": 0.88671875, + "rewards/rejected": -3.03125, + "step": 450 + }, + { + "epoch": 0.9543568464730291, + "grad_norm": 27.688196316137688, + "learning_rate": 4.3109753981805045e-07, + "logits/chosen": -1.3515625, + "logits/rejected": -1.3828125, + "logps/chosen": -696.0, + "logps/rejected": -692.0, + "loss": 0.4332, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -1.890625, + "rewards/margins": 1.3125, + "rewards/rejected": -3.1875, + "step": 460 + }, + { + "epoch": 0.975103734439834, + "grad_norm": 19.37927464187824, + "learning_rate": 4.2688338105784584e-07, + "logits/chosen": -1.203125, + "logits/rejected": -1.2890625, + "logps/chosen": -556.0, + "logps/rejected": -568.0, + "loss": 0.4428, + "rewards/accuracies": 0.75, + "rewards/chosen": -1.2890625, + "rewards/margins": 1.09375, + "rewards/rejected": -2.390625, + "step": 470 + }, + { + "epoch": 0.995850622406639, + "grad_norm": 16.81355148557565, + "learning_rate": 4.2256608610857014e-07, + "logits/chosen": -1.3984375, + "logits/rejected": -1.390625, + "logps/chosen": -604.0, + "logps/rejected": -576.0, + "loss": 0.4534, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.3828125, + "rewards/margins": 1.0390625, + "rewards/rejected": -2.421875, + "step": 480 + }, + { + "epoch": 1.016597510373444, + "grad_norm": 22.885104807982884, + "learning_rate": 4.181481722748197e-07, + "logits/chosen": -1.3671875, + "logits/rejected": -1.328125, + "logps/chosen": -446.0, + "logps/rejected": -548.0, + "loss": 0.3546, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -1.4375, + "rewards/margins": 1.0546875, + "rewards/rejected": -2.5, + "step": 490 + }, + { + "epoch": 1.037344398340249, + "grad_norm": 28.388093085355763, + "learning_rate": 4.136322155294968e-07, + "logits/chosen": -1.1875, + "logits/rejected": -1.3046875, + "logps/chosen": -656.0, + "logps/rejected": -740.0, + "loss": 0.3066, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.25, + "rewards/margins": 1.2890625, + "rewards/rejected": -3.53125, + "step": 500 + }, + { + "epoch": 1.058091286307054, + "grad_norm": 18.000547586432397, + "learning_rate": 4.090208490118253e-07, + "logits/chosen": -1.21875, + "logits/rejected": -1.3671875, + "logps/chosen": -792.0, + "logps/rejected": -820.0, + "loss": 0.3002, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.234375, + "rewards/margins": 1.640625, + "rewards/rejected": -3.859375, + "step": 510 + }, + { + "epoch": 1.0788381742738589, + "grad_norm": 26.116853952186087, + "learning_rate": 4.0431676149203457e-07, + "logits/chosen": -1.25, + "logits/rejected": -1.2421875, + "logps/chosen": -544.0, + "logps/rejected": -660.0, + "loss": 0.305, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -1.8828125, + "rewards/margins": 1.9453125, + "rewards/rejected": -3.828125, + "step": 520 + }, + { + "epoch": 1.099585062240664, + "grad_norm": 20.665661382654836, + "learning_rate": 3.995226958036058e-07, + "logits/chosen": -1.0625, + "logits/rejected": -1.125, + "logps/chosen": -628.0, + "logps/rejected": -728.0, + "loss": 0.282, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.53125, + "rewards/margins": 1.625, + "rewards/rejected": -4.15625, + "step": 530 + }, + { + "epoch": 1.120331950207469, + "grad_norm": 20.874625813560073, + "learning_rate": 3.9464144724399605e-07, + "logits/chosen": -1.1328125, + "logits/rejected": -1.109375, + "logps/chosen": -656.0, + "logps/rejected": -780.0, + "loss": 0.2842, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -1.9140625, + "rewards/margins": 2.1875, + "rewards/rejected": -4.09375, + "step": 540 + }, + { + "epoch": 1.1410788381742738, + "grad_norm": 15.838820640241945, + "learning_rate": 3.896758619447714e-07, + "logits/chosen": -1.1796875, + "logits/rejected": -1.21875, + "logps/chosen": -608.0, + "logps/rejected": -804.0, + "loss": 0.2805, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.484375, + "rewards/margins": 2.078125, + "rewards/rejected": -4.5625, + "step": 550 + }, + { + "epoch": 1.161825726141079, + "grad_norm": 16.332425845632645, + "learning_rate": 3.846288352121003e-07, + "logits/chosen": -1.3203125, + "logits/rejected": -1.265625, + "logps/chosen": -696.0, + "logps/rejected": -712.0, + "loss": 0.3048, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.765625, + "rewards/margins": 1.0234375, + "rewards/rejected": -3.78125, + "step": 560 + }, + { + "epoch": 1.1825726141078838, + "grad_norm": 17.929954538214847, + "learning_rate": 3.795033098385744e-07, + "logits/chosen": -1.28125, + "logits/rejected": -1.3125, + "logps/chosen": -620.0, + "logps/rejected": -832.0, + "loss": 0.2894, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.609375, + "rewards/margins": 1.71875, + "rewards/rejected": -4.34375, + "step": 570 + }, + { + "epoch": 1.2033195020746887, + "grad_norm": 32.79066665207477, + "learning_rate": 3.7430227438734086e-07, + "logits/chosen": -1.2265625, + "logits/rejected": -1.1875, + "logps/chosen": -656.0, + "logps/rejected": -764.0, + "loss": 0.2766, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.640625, + "rewards/margins": 1.609375, + "rewards/rejected": -4.25, + "step": 580 + }, + { + "epoch": 1.2240663900414939, + "grad_norm": 18.82509345910388, + "learning_rate": 3.690287614495481e-07, + "logits/chosen": -1.328125, + "logits/rejected": -1.3203125, + "logps/chosen": -740.0, + "logps/rejected": -920.0, + "loss": 0.2188, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.75, + "rewards/margins": 2.296875, + "rewards/rejected": -5.0625, + "step": 590 + }, + { + "epoch": 1.2448132780082988, + "grad_norm": 22.18730206624369, + "learning_rate": 3.6368584587611854e-07, + "logits/chosen": -1.2734375, + "logits/rejected": -1.3203125, + "logps/chosen": -692.0, + "logps/rejected": -920.0, + "loss": 0.2944, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -3.125, + "rewards/margins": 2.28125, + "rewards/rejected": -5.40625, + "step": 600 + }, + { + "epoch": 1.2655601659751037, + "grad_norm": 16.250841109003115, + "learning_rate": 3.582766429848818e-07, + "logits/chosen": -1.34375, + "logits/rejected": -1.375, + "logps/chosen": -652.0, + "logps/rejected": -756.0, + "loss": 0.2651, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.59375, + "rewards/margins": 1.75, + "rewards/rejected": -4.34375, + "step": 610 + }, + { + "epoch": 1.2863070539419086, + "grad_norm": 32.2790592968402, + "learning_rate": 3.528043067441123e-07, + "logits/chosen": -1.2265625, + "logits/rejected": -1.28125, + "logps/chosen": -516.0, + "logps/rejected": -724.0, + "loss": 0.2655, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -2.5, + "rewards/margins": 1.7734375, + "rewards/rejected": -4.28125, + "step": 620 + }, + { + "epoch": 1.3070539419087137, + "grad_norm": 21.008974688174074, + "learning_rate": 3.472720279335305e-07, + "logits/chosen": -1.3515625, + "logits/rejected": -1.3125, + "logps/chosen": -716.0, + "logps/rejected": -856.0, + "loss": 0.2614, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -3.0, + "rewards/margins": 1.8828125, + "rewards/rejected": -4.875, + "step": 630 + }, + { + "epoch": 1.3278008298755186, + "grad_norm": 24.817331119096536, + "learning_rate": 3.4168303228384097e-07, + "logits/chosen": -1.2578125, + "logits/rejected": -1.34375, + "logps/chosen": -744.0, + "logps/rejected": -880.0, + "loss": 0.2602, + "rewards/accuracies": 0.75, + "rewards/chosen": -2.984375, + "rewards/margins": 1.375, + "rewards/rejected": -4.375, + "step": 640 + }, + { + "epoch": 1.3485477178423237, + "grad_norm": 16.172899710422996, + "learning_rate": 3.36040578595891e-07, + "logits/chosen": -1.2109375, + "logits/rejected": -1.234375, + "logps/chosen": -704.0, + "logps/rejected": -928.0, + "loss": 0.2566, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.8125, + "rewards/margins": 2.09375, + "rewards/rejected": -4.90625, + "step": 650 + }, + { + "epoch": 1.3692946058091287, + "grad_norm": 28.674908041856455, + "learning_rate": 3.303479568405467e-07, + "logits/chosen": -1.2578125, + "logits/rejected": -1.34375, + "logps/chosen": -692.0, + "logps/rejected": -712.0, + "loss": 0.248, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -2.59375, + "rewards/margins": 1.40625, + "rewards/rejected": -4.0, + "step": 660 + }, + { + "epoch": 1.3900414937759336, + "grad_norm": 18.929031877783473, + "learning_rate": 3.246084862403949e-07, + "logits/chosen": -1.0546875, + "logits/rejected": -1.1875, + "logps/chosen": -724.0, + "logps/rejected": -856.0, + "loss": 0.2285, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.609375, + "rewards/margins": 2.390625, + "rewards/rejected": -5.0, + "step": 670 + }, + { + "epoch": 1.4107883817427385, + "grad_norm": 27.863022449197903, + "learning_rate": 3.188255133343896e-07, + "logits/chosen": -1.3359375, + "logits/rejected": -1.390625, + "logps/chosen": -744.0, + "logps/rejected": -1012.0, + "loss": 0.2616, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.328125, + "rewards/margins": 2.359375, + "rewards/rejected": -5.6875, + "step": 680 + }, + { + "epoch": 1.4315352697095436, + "grad_norm": 20.6381381654102, + "learning_rate": 3.1300241002656964e-07, + "logits/chosen": -1.2578125, + "logits/rejected": -1.3125, + "logps/chosen": -744.0, + "logps/rejected": -932.0, + "loss": 0.2343, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -3.046875, + "rewards/margins": 2.515625, + "rewards/rejected": -5.5625, + "step": 690 + }, + { + "epoch": 1.4522821576763485, + "grad_norm": 21.416162109964308, + "learning_rate": 3.071425716199882e-07, + "logits/chosen": -1.265625, + "logits/rejected": -1.3203125, + "logps/chosen": -708.0, + "logps/rejected": -940.0, + "loss": 0.2137, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.765625, + "rewards/margins": 2.734375, + "rewards/rejected": -5.5, + "step": 700 + }, + { + "epoch": 1.4730290456431536, + "grad_norm": 27.537624866222753, + "learning_rate": 3.0124941483699753e-07, + "logits/chosen": -1.34375, + "logits/rejected": -1.34375, + "logps/chosen": -892.0, + "logps/rejected": -1064.0, + "loss": 0.2099, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -4.34375, + "rewards/margins": 1.875, + "rewards/rejected": -6.21875, + "step": 710 + }, + { + "epoch": 1.4937759336099585, + "grad_norm": 17.582919177615466, + "learning_rate": 2.953263758270459e-07, + "logits/chosen": -1.21875, + "logits/rejected": -1.328125, + "logps/chosen": -612.0, + "logps/rejected": -720.0, + "loss": 0.236, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.390625, + "rewards/margins": 1.9453125, + "rewards/rejected": -4.34375, + "step": 720 + }, + { + "epoch": 1.5145228215767634, + "grad_norm": 33.007588469499844, + "learning_rate": 2.8937690816314577e-07, + "logits/chosen": -1.328125, + "logits/rejected": -1.390625, + "logps/chosen": -744.0, + "logps/rejected": -980.0, + "loss": 0.2203, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.515625, + "rewards/margins": 2.5625, + "rewards/rejected": -6.0625, + "step": 730 + }, + { + "epoch": 1.5352697095435683, + "grad_norm": 24.645168136425823, + "learning_rate": 2.834044808281841e-07, + "logits/chosen": -1.3046875, + "logits/rejected": -1.4375, + "logps/chosen": -664.0, + "logps/rejected": -868.0, + "loss": 0.229, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.6875, + "rewards/margins": 2.75, + "rewards/rejected": -5.4375, + "step": 740 + }, + { + "epoch": 1.5560165975103735, + "grad_norm": 19.904894364928772, + "learning_rate": 2.774125761922463e-07, + "logits/chosen": -1.1953125, + "logits/rejected": -1.21875, + "logps/chosen": -580.0, + "logps/rejected": -884.0, + "loss": 0.2046, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.453125, + "rewards/margins": 2.90625, + "rewards/rejected": -5.34375, + "step": 750 + }, + { + "epoch": 1.5767634854771784, + "grad_norm": 30.15619672097981, + "learning_rate": 2.714046879821358e-07, + "logits/chosen": -1.1875, + "logits/rejected": -1.328125, + "logps/chosen": -716.0, + "logps/rejected": -992.0, + "loss": 0.2351, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.484375, + "rewards/margins": 2.9375, + "rewards/rejected": -6.40625, + "step": 760 + }, + { + "epoch": 1.5975103734439835, + "grad_norm": 27.783614402532425, + "learning_rate": 2.653843192442699e-07, + "logits/chosen": -1.2734375, + "logits/rejected": -1.234375, + "logps/chosen": -724.0, + "logps/rejected": -860.0, + "loss": 0.2573, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.203125, + "rewards/margins": 2.046875, + "rewards/rejected": -5.25, + "step": 770 + }, + { + "epoch": 1.6182572614107884, + "grad_norm": 19.560666870988193, + "learning_rate": 2.5935498030214397e-07, + "logits/chosen": -1.3515625, + "logits/rejected": -1.40625, + "logps/chosen": -700.0, + "logps/rejected": -860.0, + "loss": 0.2086, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -3.0, + "rewards/margins": 1.8828125, + "rewards/rejected": -4.875, + "step": 780 + }, + { + "epoch": 1.6390041493775933, + "grad_norm": 21.195930951645135, + "learning_rate": 2.533201867095504e-07, + "logits/chosen": -1.375, + "logits/rejected": -1.34375, + "logps/chosen": -756.0, + "logps/rejected": -1048.0, + "loss": 0.2734, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.53125, + "rewards/margins": 3.640625, + "rewards/rejected": -7.1875, + "step": 790 + }, + { + "epoch": 1.6597510373443982, + "grad_norm": 20.462891895112204, + "learning_rate": 2.472834572007493e-07, + "logits/chosen": -1.4140625, + "logits/rejected": -1.4765625, + "logps/chosen": -728.0, + "logps/rejected": -872.0, + "loss": 0.2306, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.421875, + "rewards/margins": 2.84375, + "rewards/rejected": -5.28125, + "step": 800 + }, + { + "epoch": 1.6804979253112033, + "grad_norm": 23.905476075586677, + "learning_rate": 2.4124831163878427e-07, + "logits/chosen": -1.234375, + "logits/rejected": -1.3125, + "logps/chosen": -700.0, + "logps/rejected": -936.0, + "loss": 0.2081, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.3125, + "rewards/margins": 2.609375, + "rewards/rejected": -5.9375, + "step": 810 + }, + { + "epoch": 1.7012448132780082, + "grad_norm": 23.605115040621836, + "learning_rate": 2.3521826896313965e-07, + "logits/chosen": -1.2734375, + "logits/rejected": -1.4140625, + "logps/chosen": -704.0, + "logps/rejected": -1104.0, + "loss": 0.2039, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.59375, + "rewards/margins": 4.0, + "rewards/rejected": -7.59375, + "step": 820 + }, + { + "epoch": 1.7219917012448134, + "grad_norm": 25.173345988155088, + "learning_rate": 2.2919684513793704e-07, + "logits/chosen": -1.2421875, + "logits/rejected": -1.4296875, + "logps/chosen": -736.0, + "logps/rejected": -972.0, + "loss": 0.2168, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.609375, + "rewards/margins": 2.84375, + "rewards/rejected": -6.4375, + "step": 830 + }, + { + "epoch": 1.7427385892116183, + "grad_norm": 18.48915801624125, + "learning_rate": 2.2318755110186602e-07, + "logits/chosen": -1.2734375, + "logits/rejected": -1.3671875, + "logps/chosen": -688.0, + "logps/rejected": -828.0, + "loss": 0.2236, + "rewards/accuracies": 1.0, + "rewards/chosen": -2.4375, + "rewards/margins": 2.546875, + "rewards/rejected": -5.0, + "step": 840 + }, + { + "epoch": 1.7634854771784232, + "grad_norm": 18.093111350178464, + "learning_rate": 2.171938907210457e-07, + "logits/chosen": -1.2421875, + "logits/rejected": -1.2109375, + "logps/chosen": -632.0, + "logps/rejected": -904.0, + "loss": 0.203, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -2.890625, + "rewards/margins": 2.390625, + "rewards/rejected": -5.28125, + "step": 850 + }, + { + "epoch": 1.784232365145228, + "grad_norm": 19.412483996631583, + "learning_rate": 2.1121935874600914e-07, + "logits/chosen": -1.2265625, + "logits/rejected": -1.3203125, + "logps/chosen": -704.0, + "logps/rejected": -944.0, + "loss": 0.2065, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.25, + "rewards/margins": 2.71875, + "rewards/rejected": -5.96875, + "step": 860 + }, + { + "epoch": 1.8049792531120332, + "grad_norm": 27.072030223868076, + "learning_rate": 2.052674387740039e-07, + "logits/chosen": -1.34375, + "logits/rejected": -1.3046875, + "logps/chosen": -736.0, + "logps/rejected": -1008.0, + "loss": 0.2191, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.5, + "rewards/margins": 2.875, + "rewards/rejected": -6.375, + "step": 870 + }, + { + "epoch": 1.8257261410788381, + "grad_norm": 23.115028154031677, + "learning_rate": 1.9934160121779511e-07, + "logits/chosen": -1.140625, + "logits/rejected": -1.25, + "logps/chosen": -812.0, + "logps/rejected": -1000.0, + "loss": 0.2042, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.765625, + "rewards/margins": 2.296875, + "rewards/rejected": -6.0625, + "step": 880 + }, + { + "epoch": 1.8464730290456433, + "grad_norm": 21.41388111371229, + "learning_rate": 1.9344530128215644e-07, + "logits/chosen": -1.2734375, + "logits/rejected": -1.3515625, + "logps/chosen": -752.0, + "logps/rejected": -852.0, + "loss": 0.2198, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.59375, + "rewards/margins": 2.453125, + "rewards/rejected": -5.03125, + "step": 890 + }, + { + "epoch": 1.8672199170124482, + "grad_norm": 21.253233874014462, + "learning_rate": 1.8758197694922812e-07, + "logits/chosen": -1.25, + "logits/rejected": -1.3671875, + "logps/chosen": -740.0, + "logps/rejected": -944.0, + "loss": 0.2285, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -2.953125, + "rewards/margins": 2.078125, + "rewards/rejected": -5.03125, + "step": 900 + }, + { + "epoch": 1.887966804979253, + "grad_norm": 20.368884562679153, + "learning_rate": 1.8175504697391728e-07, + "logits/chosen": -1.265625, + "logits/rejected": -1.296875, + "logps/chosen": -852.0, + "logps/rejected": -968.0, + "loss": 0.1723, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -3.953125, + "rewards/margins": 2.015625, + "rewards/rejected": -5.96875, + "step": 910 + }, + { + "epoch": 1.908713692946058, + "grad_norm": 16.864214805797634, + "learning_rate": 1.7596790889050907e-07, + "logits/chosen": -1.34375, + "logits/rejected": -1.34375, + "logps/chosen": -720.0, + "logps/rejected": -940.0, + "loss": 0.1957, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.0625, + "rewards/margins": 2.78125, + "rewards/rejected": -5.84375, + "step": 920 + }, + { + "epoch": 1.929460580912863, + "grad_norm": 16.073322912809243, + "learning_rate": 1.702239370316515e-07, + "logits/chosen": -1.2578125, + "logits/rejected": -1.34375, + "logps/chosen": -792.0, + "logps/rejected": -1056.0, + "loss": 0.1968, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.5, + "rewards/margins": 3.4375, + "rewards/rejected": -6.9375, + "step": 930 + }, + { + "epoch": 1.950207468879668, + "grad_norm": 22.36544108559203, + "learning_rate": 1.645264805608674e-07, + "logits/chosen": -1.3046875, + "logits/rejected": -1.2890625, + "logps/chosen": -816.0, + "logps/rejected": -1032.0, + "loss": 0.1829, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -3.75, + "rewards/margins": 2.296875, + "rewards/rejected": -6.0625, + "step": 940 + }, + { + "epoch": 1.9709543568464731, + "grad_norm": 27.387195026328936, + "learning_rate": 1.58878861519743e-07, + "logits/chosen": -1.21875, + "logits/rejected": -1.28125, + "logps/chosen": -624.0, + "logps/rejected": -928.0, + "loss": 0.2129, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.125, + "rewards/margins": 2.9375, + "rewards/rejected": -6.0625, + "step": 950 + }, + { + "epoch": 1.991701244813278, + "grad_norm": 36.05924674346635, + "learning_rate": 1.5328437289093015e-07, + "logits/chosen": -1.3203125, + "logits/rejected": -1.3515625, + "logps/chosen": -712.0, + "logps/rejected": -1020.0, + "loss": 0.1837, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -3.65625, + "rewards/margins": 2.78125, + "rewards/rejected": -6.4375, + "step": 960 + }, + { + "epoch": 2.012448132780083, + "grad_norm": 9.806579172400033, + "learning_rate": 1.4774627667809223e-07, + "logits/chosen": -1.3046875, + "logits/rejected": -1.3515625, + "logps/chosen": -700.0, + "logps/rejected": -1008.0, + "loss": 0.1358, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.171875, + "rewards/margins": 3.421875, + "rewards/rejected": -6.5625, + "step": 970 + }, + { + "epoch": 2.033195020746888, + "grad_norm": 11.092351348103373, + "learning_rate": 1.4226780200391267e-07, + "logits/chosen": -0.91796875, + "logits/rejected": -1.0, + "logps/chosen": -756.0, + "logps/rejected": -1160.0, + "loss": 0.0784, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.0625, + "rewards/margins": 3.78125, + "rewards/rejected": -7.84375, + "step": 980 + }, + { + "epoch": 2.0539419087136928, + "grad_norm": 10.816346221719781, + "learning_rate": 1.3685214322727596e-07, + "logits/chosen": -1.046875, + "logits/rejected": -1.203125, + "logps/chosen": -880.0, + "logps/rejected": -1272.0, + "loss": 0.0719, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.59375, + "rewards/margins": 4.875, + "rewards/rejected": -9.4375, + "step": 990 + }, + { + "epoch": 2.074688796680498, + "grad_norm": 10.729589636729887, + "learning_rate": 1.3150245808071854e-07, + "logits/chosen": -1.265625, + "logits/rejected": -1.265625, + "logps/chosen": -1020.0, + "logps/rejected": -1288.0, + "loss": 0.0733, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.21875, + "rewards/margins": 3.71875, + "rewards/rejected": -8.9375, + "step": 1000 + }, + { + "epoch": 2.095435684647303, + "grad_norm": 7.237413191543059, + "learning_rate": 1.2622186582923566e-07, + "logits/chosen": -1.203125, + "logits/rejected": -1.28125, + "logps/chosen": -996.0, + "logps/rejected": -1264.0, + "loss": 0.0756, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.53125, + "rewards/margins": 3.25, + "rewards/rejected": -7.78125, + "step": 1010 + }, + { + "epoch": 2.116182572614108, + "grad_norm": 14.390311697685057, + "learning_rate": 1.2101344545151713e-07, + "logits/chosen": -1.234375, + "logits/rejected": -1.2890625, + "logps/chosen": -856.0, + "logps/rejected": -1168.0, + "loss": 0.0688, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.71875, + "rewards/margins": 3.578125, + "rewards/rejected": -8.3125, + "step": 1020 + }, + { + "epoch": 2.136929460580913, + "grad_norm": 9.371742649196001, + "learning_rate": 1.1588023384467335e-07, + "logits/chosen": -1.2578125, + "logits/rejected": -1.34375, + "logps/chosen": -848.0, + "logps/rejected": -1240.0, + "loss": 0.0873, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.78125, + "rewards/margins": 4.53125, + "rewards/rejected": -9.3125, + "step": 1030 + }, + { + "epoch": 2.1576763485477177, + "grad_norm": 13.411003854877661, + "learning_rate": 1.1082522405349834e-07, + "logits/chosen": -1.21875, + "logits/rejected": -1.2734375, + "logps/chosen": -744.0, + "logps/rejected": -1144.0, + "loss": 0.0694, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.546875, + "rewards/margins": 4.875, + "rewards/rejected": -8.4375, + "step": 1040 + }, + { + "epoch": 2.1784232365145226, + "grad_norm": 10.33738561356746, + "learning_rate": 1.0585136352530172e-07, + "logits/chosen": -1.4296875, + "logits/rejected": -1.484375, + "logps/chosen": -876.0, + "logps/rejected": -1184.0, + "loss": 0.0812, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -4.5625, + "rewards/margins": 3.015625, + "rewards/rejected": -7.59375, + "step": 1050 + }, + { + "epoch": 2.199170124481328, + "grad_norm": 15.189705649161667, + "learning_rate": 1.0096155239132675e-07, + "logits/chosen": -1.296875, + "logits/rejected": -1.328125, + "logps/chosen": -672.0, + "logps/rejected": -924.0, + "loss": 0.0763, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.625, + "rewards/margins": 3.21875, + "rewards/rejected": -6.84375, + "step": 1060 + }, + { + "epoch": 2.219917012448133, + "grad_norm": 9.763294237607763, + "learning_rate": 9.615864177575836e-08, + "logits/chosen": -1.2109375, + "logits/rejected": -1.3125, + "logps/chosen": -1112.0, + "logps/rejected": -1456.0, + "loss": 0.0735, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.53125, + "rewards/margins": 5.21875, + "rewards/rejected": -10.75, + "step": 1070 + }, + { + "epoch": 2.240663900414938, + "grad_norm": 12.16820558108209, + "learning_rate": 9.144543213330493e-08, + "logits/chosen": -1.390625, + "logits/rejected": -1.390625, + "logps/chosen": -848.0, + "logps/rejected": -1296.0, + "loss": 0.0737, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -4.3125, + "rewards/margins": 4.9375, + "rewards/rejected": -9.25, + "step": 1080 + }, + { + "epoch": 2.2614107883817427, + "grad_norm": 13.349946012287942, + "learning_rate": 8.682467161632508e-08, + "logits/chosen": -1.3359375, + "logits/rejected": -1.421875, + "logps/chosen": -764.0, + "logps/rejected": -1216.0, + "loss": 0.0706, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.25, + "rewards/margins": 4.34375, + "rewards/rejected": -8.5625, + "step": 1090 + }, + { + "epoch": 2.2821576763485476, + "grad_norm": 20.08687451523014, + "learning_rate": 8.229905447244942e-08, + "logits/chosen": -1.328125, + "logits/rejected": -1.3828125, + "logps/chosen": -908.0, + "logps/rejected": -1176.0, + "loss": 0.0642, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.4375, + "rewards/margins": 3.53125, + "rewards/rejected": -8.0, + "step": 1100 + }, + { + "epoch": 2.3029045643153525, + "grad_norm": 16.153630097446893, + "learning_rate": 7.787121947363393e-08, + "logits/chosen": -1.1171875, + "logits/rejected": -1.1953125, + "logps/chosen": -964.0, + "logps/rejected": -1360.0, + "loss": 0.0691, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.21875, + "rewards/margins": 4.34375, + "rewards/rejected": -9.5625, + "step": 1110 + }, + { + "epoch": 2.323651452282158, + "grad_norm": 14.370101853953887, + "learning_rate": 7.354374837755919e-08, + "logits/chosen": -1.1328125, + "logits/rejected": -1.21875, + "logps/chosen": -892.0, + "logps/rejected": -1272.0, + "loss": 0.0749, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8125, + "rewards/margins": 4.625, + "rewards/rejected": -9.4375, + "step": 1120 + }, + { + "epoch": 2.3443983402489628, + "grad_norm": 15.871062321763814, + "learning_rate": 6.931916442227335e-08, + "logits/chosen": -1.28125, + "logits/rejected": -1.28125, + "logps/chosen": -808.0, + "logps/rejected": -1184.0, + "loss": 0.068, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.34375, + "rewards/margins": 4.1875, + "rewards/rejected": -8.5, + "step": 1130 + }, + { + "epoch": 2.3651452282157677, + "grad_norm": 8.312067272107306, + "learning_rate": 6.519993085495622e-08, + "logits/chosen": -1.1640625, + "logits/rejected": -1.3046875, + "logps/chosen": -928.0, + "logps/rejected": -1440.0, + "loss": 0.0656, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.96875, + "rewards/margins": 5.375, + "rewards/rejected": -10.375, + "step": 1140 + }, + { + "epoch": 2.3858921161825726, + "grad_norm": 10.430795094415418, + "learning_rate": 6.118844949566293e-08, + "logits/chosen": -1.2578125, + "logits/rejected": -1.3515625, + "logps/chosen": -900.0, + "logps/rejected": -1384.0, + "loss": 0.0564, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.875, + "rewards/margins": 5.71875, + "rewards/rejected": -10.5625, + "step": 1150 + }, + { + "epoch": 2.4066390041493775, + "grad_norm": 22.177489465859505, + "learning_rate": 5.728705933688349e-08, + "logits/chosen": -1.2265625, + "logits/rejected": -1.2890625, + "logps/chosen": -908.0, + "logps/rejected": -1344.0, + "loss": 0.0803, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.875, + "rewards/margins": 4.28125, + "rewards/rejected": -9.125, + "step": 1160 + }, + { + "epoch": 2.4273858921161824, + "grad_norm": 6.857181586976422, + "learning_rate": 5.3498035179736475e-08, + "logits/chosen": -1.21875, + "logits/rejected": -1.3203125, + "logps/chosen": -768.0, + "logps/rejected": -1144.0, + "loss": 0.0637, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.65625, + "rewards/margins": 3.875, + "rewards/rejected": -8.5625, + "step": 1170 + }, + { + "epoch": 2.4481327800829877, + "grad_norm": 18.641976214083737, + "learning_rate": 4.98235863075899e-08, + "logits/chosen": -1.203125, + "logits/rejected": -1.234375, + "logps/chosen": -844.0, + "logps/rejected": -1304.0, + "loss": 0.0823, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -4.75, + "rewards/margins": 4.75, + "rewards/rejected": -9.5, + "step": 1180 + }, + { + "epoch": 2.4688796680497926, + "grad_norm": 11.12170347340474, + "learning_rate": 4.626585519788476e-08, + "logits/chosen": -1.28125, + "logits/rejected": -1.28125, + "logps/chosen": -776.0, + "logps/rejected": -1272.0, + "loss": 0.0621, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.859375, + "rewards/margins": 5.21875, + "rewards/rejected": -9.0625, + "step": 1190 + }, + { + "epoch": 2.4896265560165975, + "grad_norm": 10.533918362287494, + "learning_rate": 4.2826916272911154e-08, + "logits/chosen": -1.1640625, + "logits/rejected": -1.2890625, + "logps/chosen": -916.0, + "logps/rejected": -1256.0, + "loss": 0.0647, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -4.96875, + "rewards/margins": 4.09375, + "rewards/rejected": -9.0625, + "step": 1200 + }, + { + "epoch": 2.5103734439834025, + "grad_norm": 11.17720015590518, + "learning_rate": 3.950877469026523e-08, + "logits/chosen": -1.2734375, + "logits/rejected": -1.3046875, + "logps/chosen": -1128.0, + "logps/rejected": -1528.0, + "loss": 0.0642, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.625, + "rewards/margins": 5.1875, + "rewards/rejected": -10.8125, + "step": 1210 + }, + { + "epoch": 2.5311203319502074, + "grad_norm": 19.957831888824806, + "learning_rate": 3.631336517369313e-08, + "logits/chosen": -1.1953125, + "logits/rejected": -1.328125, + "logps/chosen": -768.0, + "logps/rejected": -1160.0, + "loss": 0.0622, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -4.28125, + "rewards/margins": 4.125, + "rewards/rejected": -8.4375, + "step": 1220 + }, + { + "epoch": 2.5518672199170123, + "grad_norm": 17.996461125043027, + "learning_rate": 3.3242550885002805e-08, + "logits/chosen": -1.1953125, + "logits/rejected": -1.2421875, + "logps/chosen": -1008.0, + "logps/rejected": -1400.0, + "loss": 0.0673, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0, + "rewards/margins": 5.03125, + "rewards/rejected": -10.0, + "step": 1230 + }, + { + "epoch": 2.572614107883817, + "grad_norm": 12.137212294540161, + "learning_rate": 3.029812233770215e-08, + "logits/chosen": -1.2421875, + "logits/rejected": -1.234375, + "logps/chosen": -776.0, + "logps/rejected": -1112.0, + "loss": 0.0709, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -4.03125, + "rewards/margins": 3.65625, + "rewards/rejected": -7.6875, + "step": 1240 + }, + { + "epoch": 2.5933609958506225, + "grad_norm": 6.03151820091559, + "learning_rate": 2.74817963529958e-08, + "logits/chosen": -1.1171875, + "logits/rejected": -1.171875, + "logps/chosen": -672.0, + "logps/rejected": -1160.0, + "loss": 0.0584, + "rewards/accuracies": 1.0, + "rewards/chosen": -3.765625, + "rewards/margins": 4.8125, + "rewards/rejected": -8.5625, + "step": 1250 + }, + { + "epoch": 2.6141078838174274, + "grad_norm": 25.243724218800992, + "learning_rate": 2.479521505875079e-08, + "logits/chosen": -1.265625, + "logits/rejected": -1.2578125, + "logps/chosen": -976.0, + "logps/rejected": -1352.0, + "loss": 0.0643, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.53125, + "rewards/margins": 4.8125, + "rewards/rejected": -10.375, + "step": 1260 + }, + { + "epoch": 2.6348547717842323, + "grad_norm": 9.357215641019858, + "learning_rate": 2.223994493201342e-08, + "logits/chosen": -1.2265625, + "logits/rejected": -1.3125, + "logps/chosen": -888.0, + "logps/rejected": -1272.0, + "loss": 0.0645, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -4.84375, + "rewards/margins": 4.15625, + "rewards/rejected": -9.0, + "step": 1270 + }, + { + "epoch": 2.6556016597510372, + "grad_norm": 11.426915475943359, + "learning_rate": 1.9817475885636868e-08, + "logits/chosen": -1.109375, + "logits/rejected": -1.265625, + "logps/chosen": -924.0, + "logps/rejected": -1344.0, + "loss": 0.0652, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.8125, + "rewards/margins": 4.90625, + "rewards/rejected": -9.6875, + "step": 1280 + }, + { + "epoch": 2.6763485477178426, + "grad_norm": 13.561447852898182, + "learning_rate": 1.7529220399550376e-08, + "logits/chosen": -1.140625, + "logits/rejected": -1.25, + "logps/chosen": -988.0, + "logps/rejected": -1472.0, + "loss": 0.0567, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.625, + "rewards/margins": 5.28125, + "rewards/rejected": -10.875, + "step": 1290 + }, + { + "epoch": 2.6970954356846475, + "grad_norm": 8.09337248601455, + "learning_rate": 1.5376512697178713e-08, + "logits/chosen": -1.234375, + "logits/rejected": -1.2265625, + "logps/chosen": -824.0, + "logps/rejected": -1224.0, + "loss": 0.063, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5625, + "rewards/margins": 4.71875, + "rewards/rejected": -9.25, + "step": 1300 + }, + { + "epoch": 2.7178423236514524, + "grad_norm": 16.861726238832322, + "learning_rate": 1.3360607967490307e-08, + "logits/chosen": -1.1640625, + "logits/rejected": -1.15625, + "logps/chosen": -1048.0, + "logps/rejected": -1400.0, + "loss": 0.0647, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.78125, + "rewards/margins": 4.34375, + "rewards/rejected": -10.125, + "step": 1310 + }, + { + "epoch": 2.7385892116182573, + "grad_norm": 20.632985201134645, + "learning_rate": 1.1482681633128738e-08, + "logits/chosen": -1.3046875, + "logits/rejected": -1.3515625, + "logps/chosen": -920.0, + "logps/rejected": -1200.0, + "loss": 0.0816, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -4.40625, + "rewards/margins": 3.65625, + "rewards/rejected": -8.0625, + "step": 1320 + }, + { + "epoch": 2.759336099585062, + "grad_norm": 9.84758156918914, + "learning_rate": 9.743828665053466e-09, + "logits/chosen": -1.1796875, + "logits/rejected": -1.203125, + "logps/chosen": -868.0, + "logps/rejected": -1296.0, + "loss": 0.05, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.625, + "rewards/margins": 5.0, + "rewards/rejected": -9.625, + "step": 1330 + }, + { + "epoch": 2.780082987551867, + "grad_norm": 11.328737169791557, + "learning_rate": 8.145062944090425e-09, + "logits/chosen": -1.2578125, + "logits/rejected": -1.1953125, + "logps/chosen": -928.0, + "logps/rejected": -1368.0, + "loss": 0.0804, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0, + "rewards/margins": 4.25, + "rewards/rejected": -9.25, + "step": 1340 + }, + { + "epoch": 2.800829875518672, + "grad_norm": 24.41038502693378, + "learning_rate": 6.687316669763937e-09, + "logits/chosen": -1.2265625, + "logits/rejected": -1.3125, + "logps/chosen": -808.0, + "logps/rejected": -1200.0, + "loss": 0.0622, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.46875, + "rewards/margins": 4.28125, + "rewards/rejected": -8.75, + "step": 1350 + }, + { + "epoch": 2.821576763485477, + "grad_norm": 15.889369514063155, + "learning_rate": 5.371439816754892e-09, + "logits/chosen": -1.1640625, + "logits/rejected": -1.234375, + "logps/chosen": -856.0, + "logps/rejected": -1136.0, + "loss": 0.0684, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -4.96875, + "rewards/margins": 3.5, + "rewards/rejected": -8.4375, + "step": 1360 + }, + { + "epoch": 2.8423236514522823, + "grad_norm": 13.23599572717501, + "learning_rate": 4.198199639302152e-09, + "logits/chosen": -1.1796875, + "logits/rejected": -1.21875, + "logps/chosen": -800.0, + "logps/rejected": -1280.0, + "loss": 0.0608, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.59375, + "rewards/margins": 4.71875, + "rewards/rejected": -9.3125, + "step": 1370 + }, + { + "epoch": 2.863070539419087, + "grad_norm": 9.4244826040974, + "learning_rate": 3.1682802238362506e-09, + "logits/chosen": -1.1953125, + "logits/rejected": -1.2578125, + "logps/chosen": -820.0, + "logps/rejected": -1280.0, + "loss": 0.0549, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.40625, + "rewards/margins": 4.59375, + "rewards/rejected": -9.0, + "step": 1380 + }, + { + "epoch": 2.883817427385892, + "grad_norm": 9.572805205888423, + "learning_rate": 2.2822820901060025e-09, + "logits/chosen": -1.3046875, + "logits/rejected": -1.2265625, + "logps/chosen": -920.0, + "logps/rejected": -1168.0, + "loss": 0.0583, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.5625, + "rewards/margins": 4.0625, + "rewards/rejected": -8.625, + "step": 1390 + }, + { + "epoch": 2.904564315352697, + "grad_norm": 5.8710007402955835, + "learning_rate": 1.5407218410307398e-09, + "logits/chosen": -1.3125, + "logits/rejected": -1.3828125, + "logps/chosen": -824.0, + "logps/rejected": -1168.0, + "loss": 0.0627, + "rewards/accuracies": 0.8999999761581421, + "rewards/chosen": -5.09375, + "rewards/margins": 3.34375, + "rewards/rejected": -8.4375, + "step": 1400 + }, + { + "epoch": 2.9253112033195023, + "grad_norm": 8.81585615187788, + "learning_rate": 9.440318614823417e-10, + "logits/chosen": -1.234375, + "logits/rejected": -1.2421875, + "logps/chosen": -788.0, + "logps/rejected": -1128.0, + "loss": 0.0704, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -4.34375, + "rewards/margins": 3.9375, + "rewards/rejected": -8.3125, + "step": 1410 + }, + { + "epoch": 2.9460580912863072, + "grad_norm": 8.442496137212597, + "learning_rate": 4.925600661726537e-10, + "logits/chosen": -1.2421875, + "logits/rejected": -1.3359375, + "logps/chosen": -916.0, + "logps/rejected": -1320.0, + "loss": 0.0613, + "rewards/accuracies": 1.0, + "rewards/chosen": -4.46875, + "rewards/margins": 4.84375, + "rewards/rejected": -9.3125, + "step": 1420 + }, + { + "epoch": 2.966804979253112, + "grad_norm": 14.014913089264976, + "learning_rate": 1.8656969679323176e-10, + "logits/chosen": -1.2734375, + "logits/rejected": -1.2890625, + "logps/chosen": -936.0, + "logps/rejected": -1256.0, + "loss": 0.0667, + "rewards/accuracies": 1.0, + "rewards/chosen": -5.0, + "rewards/margins": 4.40625, + "rewards/rejected": -9.375, + "step": 1430 + }, + { + "epoch": 2.987551867219917, + "grad_norm": 8.079064512428207, + "learning_rate": 2.6239168525898915e-11, + "logits/chosen": -1.25, + "logits/rejected": -1.1328125, + "logps/chosen": -860.0, + "logps/rejected": -1152.0, + "loss": 0.0733, + "rewards/accuracies": 0.949999988079071, + "rewards/chosen": -4.65625, + "rewards/margins": 3.859375, + "rewards/rejected": -8.5, + "step": 1440 + }, + { + "epoch": 3.0, + "step": 1446, + "total_flos": 0.0, + "train_loss": 0.29230043576466097, + "train_runtime": 30177.164, + "train_samples_per_second": 3.062, + "train_steps_per_second": 0.048 + } + ], + "logging_steps": 10, + "max_steps": 1446, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..8ecda08 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c722a48a4d5fd467448daf73df9ba4886621e51a918de71dbc6b87a59f0d7eac +size 7672