初始化项目,由ModelHub XC社区提供模型
Model: li-muyang/zephyr-8b-dpo-full Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
80
README.md
Normal file
80
README.md
Normal file
@@ -0,0 +1,80 @@
|
||||
---
|
||||
library_name: transformers
|
||||
license: llama3.1
|
||||
base_model: meta-llama/Llama-3.1-8B
|
||||
tags:
|
||||
- alignment-handbook
|
||||
- trl
|
||||
- dpo
|
||||
- generated_from_trainer
|
||||
- trl
|
||||
- dpo
|
||||
- generated_from_trainer
|
||||
datasets:
|
||||
- HuggingFaceH4/ultrafeedback_binarized
|
||||
model-index:
|
||||
- name: zephyr-8b-dpo-full
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
# zephyr-8b-dpo-full
|
||||
|
||||
This model is a fine-tuned version of [meta-llama/Llama-3.1-8B](https://huggingface.co/meta-llama/Llama-3.1-8B) on the HuggingFaceH4/ultrafeedback_binarized dataset.
|
||||
It achieves the following results on the evaluation set:
|
||||
- Loss: 0.5372
|
||||
- Rewards/chosen: -1.1911
|
||||
- Rewards/rejected: -2.0021
|
||||
- Rewards/accuracies: 0.7656
|
||||
- Rewards/margins: 0.8110
|
||||
- Logps/rejected: -481.7125
|
||||
- Logps/chosen: -401.6271
|
||||
- Logits/rejected: -0.5622
|
||||
- Logits/chosen: -0.5978
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 5e-07
|
||||
- train_batch_size: 4
|
||||
- eval_batch_size: 8
|
||||
- seed: 42
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 8
|
||||
- gradient_accumulation_steps: 4
|
||||
- total_train_batch_size: 128
|
||||
- total_eval_batch_size: 64
|
||||
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
||||
- lr_scheduler_type: cosine
|
||||
- lr_scheduler_warmup_ratio: 0.1
|
||||
- num_epochs: 1
|
||||
|
||||
### Training results
|
||||
|
||||
| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
|
||||
|:-------------:|:------:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
|
||||
| 0.5134 | 0.9984 | 477 | 0.5372 | -1.1911 | -2.0021 | 0.7656 | 0.8110 | -481.7125 | -401.6271 | -0.5622 | -0.5978 |
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.45.2
|
||||
- Pytorch 2.5.1+rocm6.2
|
||||
- Datasets 3.2.0
|
||||
- Tokenizers 0.20.3
|
||||
22
all_results.json
Normal file
22
all_results.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"epoch": 0.9984301412872841,
|
||||
"eval_logits/chosen": -0.5977884531021118,
|
||||
"eval_logits/rejected": -0.5622259378433228,
|
||||
"eval_logps/chosen": -401.6270751953125,
|
||||
"eval_logps/rejected": -481.71246337890625,
|
||||
"eval_loss": 0.5371974110603333,
|
||||
"eval_rewards/accuracies": 0.765625,
|
||||
"eval_rewards/chosen": -1.1911048889160156,
|
||||
"eval_rewards/margins": 0.8110275268554688,
|
||||
"eval_rewards/rejected": -2.0021324157714844,
|
||||
"eval_runtime": 186.7025,
|
||||
"eval_samples": 2000,
|
||||
"eval_samples_per_second": 10.712,
|
||||
"eval_steps_per_second": 0.171,
|
||||
"total_flos": 0.0,
|
||||
"train_loss": 0.5847830807387954,
|
||||
"train_runtime": 56722.3251,
|
||||
"train_samples": 61134,
|
||||
"train_samples_per_second": 1.078,
|
||||
"train_steps_per_second": 0.008
|
||||
}
|
||||
36
config.json
Normal file
36
config.json
Normal file
@@ -0,0 +1,36 @@
|
||||
{
|
||||
"_name_or_path": "meta-llama/Llama-3.1-8B",
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 128000,
|
||||
"eos_token_id": 128001,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 14336,
|
||||
"max_position_embeddings": 131072,
|
||||
"mlp_bias": false,
|
||||
"model_type": "llama",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 32,
|
||||
"num_key_value_heads": 8,
|
||||
"pretraining_tp": 1,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_scaling": {
|
||||
"factor": 8.0,
|
||||
"high_freq_factor": 4.0,
|
||||
"low_freq_factor": 1.0,
|
||||
"original_max_position_embeddings": 8192,
|
||||
"rope_type": "llama3"
|
||||
},
|
||||
"rope_theta": 500000.0,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "bfloat16",
|
||||
"transformers_version": "4.45.2",
|
||||
"use_cache": true,
|
||||
"vocab_size": 128256
|
||||
}
|
||||
16
eval_results.json
Normal file
16
eval_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"epoch": 0.9984301412872841,
|
||||
"eval_logits/chosen": -0.5977884531021118,
|
||||
"eval_logits/rejected": -0.5622259378433228,
|
||||
"eval_logps/chosen": -401.6270751953125,
|
||||
"eval_logps/rejected": -481.71246337890625,
|
||||
"eval_loss": 0.5371974110603333,
|
||||
"eval_rewards/accuracies": 0.765625,
|
||||
"eval_rewards/chosen": -1.1911048889160156,
|
||||
"eval_rewards/margins": 0.8110275268554688,
|
||||
"eval_rewards/rejected": -2.0021324157714844,
|
||||
"eval_runtime": 186.7025,
|
||||
"eval_samples": 2000,
|
||||
"eval_samples_per_second": 10.712,
|
||||
"eval_steps_per_second": 0.171
|
||||
}
|
||||
9
generation_config.json
Normal file
9
generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"_from_model_config": true,
|
||||
"bos_token_id": 128000,
|
||||
"do_sample": true,
|
||||
"eos_token_id": 128001,
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.9,
|
||||
"transformers_version": "4.45.2"
|
||||
}
|
||||
3
model-00001-of-00004.safetensors
Normal file
3
model-00001-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:81e18d6c389551c6d4246435bd5c7181d4210322dae8b7cffae0dc30fc3e330f
|
||||
size 4976698672
|
||||
3
model-00002-of-00004.safetensors
Normal file
3
model-00002-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3394d9461e0d7681add0390dc927c3db331b817d209512b5112d54d09603bfdf
|
||||
size 4999802720
|
||||
3
model-00003-of-00004.safetensors
Normal file
3
model-00003-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:efffce6df94abdc29f5952dabb98f0c097d347f3eda2bee3559740b7552a339a
|
||||
size 4915916176
|
||||
3
model-00004-of-00004.safetensors
Normal file
3
model-00004-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:25f721c72772bcd958e88bcd9a12877b3707fc6bb6c54a6804587307160ab184
|
||||
size 1168138808
|
||||
298
model.safetensors.index.json
Normal file
298
model.safetensors.index.json
Normal file
@@ -0,0 +1,298 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_size": 16060522496
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00004-of-00004.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.norm.weight": "model-00004-of-00004.safetensors"
|
||||
}
|
||||
}
|
||||
17
special_tokens_map.json
Normal file
17
special_tokens_map.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"bos_token": {
|
||||
"content": "<|begin_of_text|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eos_token": {
|
||||
"content": "<|end_of_text|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": "<|end_of_text|>"
|
||||
}
|
||||
BIN
tokenizer.json
(Stored with Git LFS)
Normal file
BIN
tokenizer.json
(Stored with Git LFS)
Normal file
Binary file not shown.
2063
tokenizer_config.json
Normal file
2063
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
9
train_results.json
Normal file
9
train_results.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"epoch": 0.9984301412872841,
|
||||
"total_flos": 0.0,
|
||||
"train_loss": 0.5847830807387954,
|
||||
"train_runtime": 56722.3251,
|
||||
"train_samples": 61134,
|
||||
"train_samples_per_second": 1.078,
|
||||
"train_steps_per_second": 0.008
|
||||
}
|
||||
778
trainer_state.json
Normal file
778
trainer_state.json
Normal file
@@ -0,0 +1,778 @@
|
||||
{
|
||||
"best_metric": null,
|
||||
"best_model_checkpoint": null,
|
||||
"epoch": 0.9984301412872841,
|
||||
"eval_steps": 500,
|
||||
"global_step": 477,
|
||||
"is_hyper_param_search": false,
|
||||
"is_local_process_zero": true,
|
||||
"is_world_process_zero": true,
|
||||
"log_history": [
|
||||
{
|
||||
"epoch": 0.0020931449502878076,
|
||||
"grad_norm": 4.5917708857834985,
|
||||
"learning_rate": 1.0416666666666666e-08,
|
||||
"logits/chosen": -0.8526347279548645,
|
||||
"logits/rejected": -0.7768423557281494,
|
||||
"logps/chosen": -363.13519287109375,
|
||||
"logps/rejected": -364.9631042480469,
|
||||
"loss": 0.6931,
|
||||
"rewards/accuracies": 0.0,
|
||||
"rewards/chosen": 0.0,
|
||||
"rewards/margins": 0.0,
|
||||
"rewards/rejected": 0.0,
|
||||
"step": 1
|
||||
},
|
||||
{
|
||||
"epoch": 0.020931449502878074,
|
||||
"grad_norm": 4.404674449441554,
|
||||
"learning_rate": 1.0416666666666667e-07,
|
||||
"logits/chosen": -0.7482305765151978,
|
||||
"logits/rejected": -0.7081854343414307,
|
||||
"logps/chosen": -311.2024841308594,
|
||||
"logps/rejected": -284.1365966796875,
|
||||
"loss": 0.6931,
|
||||
"rewards/accuracies": 0.4375,
|
||||
"rewards/chosen": 2.1014602680224925e-05,
|
||||
"rewards/margins": 8.458160300506279e-05,
|
||||
"rewards/rejected": -6.356705853249878e-05,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"epoch": 0.04186289900575615,
|
||||
"grad_norm": 4.2921416180433765,
|
||||
"learning_rate": 2.0833333333333333e-07,
|
||||
"logits/chosen": -0.7403894066810608,
|
||||
"logits/rejected": -0.6793709993362427,
|
||||
"logps/chosen": -324.6893005371094,
|
||||
"logps/rejected": -290.2327575683594,
|
||||
"loss": 0.693,
|
||||
"rewards/accuracies": 0.4749999940395355,
|
||||
"rewards/chosen": 0.0014148516347631812,
|
||||
"rewards/margins": 0.0002438486844766885,
|
||||
"rewards/rejected": 0.0011710028629750013,
|
||||
"step": 20
|
||||
},
|
||||
{
|
||||
"epoch": 0.06279434850863422,
|
||||
"grad_norm": 4.218970991450984,
|
||||
"learning_rate": 3.1249999999999997e-07,
|
||||
"logits/chosen": -0.7561457753181458,
|
||||
"logits/rejected": -0.7098526954650879,
|
||||
"logps/chosen": -295.118408203125,
|
||||
"logps/rejected": -255.83407592773438,
|
||||
"loss": 0.6917,
|
||||
"rewards/accuracies": 0.53125,
|
||||
"rewards/chosen": 0.008535891771316528,
|
||||
"rewards/margins": 0.0028298485558480024,
|
||||
"rewards/rejected": 0.005706042982637882,
|
||||
"step": 30
|
||||
},
|
||||
{
|
||||
"epoch": 0.0837257980115123,
|
||||
"grad_norm": 3.614368719783126,
|
||||
"learning_rate": 4.1666666666666667e-07,
|
||||
"logits/chosen": -0.7299980521202087,
|
||||
"logits/rejected": -0.6768942475318909,
|
||||
"logps/chosen": -267.0862121582031,
|
||||
"logps/rejected": -267.53863525390625,
|
||||
"loss": 0.6885,
|
||||
"rewards/accuracies": 0.625,
|
||||
"rewards/chosen": 0.020161841064691544,
|
||||
"rewards/margins": 0.008152564987540245,
|
||||
"rewards/rejected": 0.012009273283183575,
|
||||
"step": 40
|
||||
},
|
||||
{
|
||||
"epoch": 0.10465724751439037,
|
||||
"grad_norm": 3.6040620075273546,
|
||||
"learning_rate": 4.999731868769026e-07,
|
||||
"logits/chosen": -0.7151128053665161,
|
||||
"logits/rejected": -0.6647322177886963,
|
||||
"logps/chosen": -296.5942077636719,
|
||||
"logps/rejected": -277.5081787109375,
|
||||
"loss": 0.6833,
|
||||
"rewards/accuracies": 0.6187499761581421,
|
||||
"rewards/chosen": 0.0398605577647686,
|
||||
"rewards/margins": 0.02836265228688717,
|
||||
"rewards/rejected": 0.011497899889945984,
|
||||
"step": 50
|
||||
},
|
||||
{
|
||||
"epoch": 0.12558869701726844,
|
||||
"grad_norm": 3.5922190973220163,
|
||||
"learning_rate": 4.990353313429303e-07,
|
||||
"logits/chosen": -0.7289865016937256,
|
||||
"logits/rejected": -0.6785635352134705,
|
||||
"logps/chosen": -262.1878967285156,
|
||||
"logps/rejected": -253.5371856689453,
|
||||
"loss": 0.6783,
|
||||
"rewards/accuracies": 0.6812499761581421,
|
||||
"rewards/chosen": 0.0495939627289772,
|
||||
"rewards/margins": 0.042832277715206146,
|
||||
"rewards/rejected": 0.0067616915330290794,
|
||||
"step": 60
|
||||
},
|
||||
{
|
||||
"epoch": 0.14652014652014653,
|
||||
"grad_norm": 3.6371057840364927,
|
||||
"learning_rate": 4.967625656594781e-07,
|
||||
"logits/chosen": -0.6846636533737183,
|
||||
"logits/rejected": -0.6486319303512573,
|
||||
"logps/chosen": -304.8815002441406,
|
||||
"logps/rejected": -293.3005065917969,
|
||||
"loss": 0.6683,
|
||||
"rewards/accuracies": 0.606249988079071,
|
||||
"rewards/chosen": 0.014333389699459076,
|
||||
"rewards/margins": 0.050421230494976044,
|
||||
"rewards/rejected": -0.036087844520807266,
|
||||
"step": 70
|
||||
},
|
||||
{
|
||||
"epoch": 0.1674515960230246,
|
||||
"grad_norm": 4.641698206330642,
|
||||
"learning_rate": 4.93167072587771e-07,
|
||||
"logits/chosen": -0.7803142070770264,
|
||||
"logits/rejected": -0.6576212048530579,
|
||||
"logps/chosen": -338.9702453613281,
|
||||
"logps/rejected": -270.46124267578125,
|
||||
"loss": 0.6684,
|
||||
"rewards/accuracies": 0.643750011920929,
|
||||
"rewards/chosen": -0.041751302778720856,
|
||||
"rewards/margins": 0.06293781846761703,
|
||||
"rewards/rejected": -0.10468912124633789,
|
||||
"step": 80
|
||||
},
|
||||
{
|
||||
"epoch": 0.18838304552590268,
|
||||
"grad_norm": 5.809449166665223,
|
||||
"learning_rate": 4.882681251368548e-07,
|
||||
"logits/chosen": -0.730857253074646,
|
||||
"logits/rejected": -0.6790161728858948,
|
||||
"logps/chosen": -270.4856872558594,
|
||||
"logps/rejected": -280.201171875,
|
||||
"loss": 0.6533,
|
||||
"rewards/accuracies": 0.65625,
|
||||
"rewards/chosen": -0.05735975503921509,
|
||||
"rewards/margins": 0.0867304801940918,
|
||||
"rewards/rejected": -0.14409023523330688,
|
||||
"step": 90
|
||||
},
|
||||
{
|
||||
"epoch": 0.20931449502878074,
|
||||
"grad_norm": 6.629060471777615,
|
||||
"learning_rate": 4.820919832540181e-07,
|
||||
"logits/chosen": -0.819484531879425,
|
||||
"logits/rejected": -0.7444473505020142,
|
||||
"logps/chosen": -320.7297058105469,
|
||||
"logps/rejected": -315.49786376953125,
|
||||
"loss": 0.6452,
|
||||
"rewards/accuracies": 0.6499999761581421,
|
||||
"rewards/chosen": -0.106062151491642,
|
||||
"rewards/margins": 0.15797743201255798,
|
||||
"rewards/rejected": -0.2640395760536194,
|
||||
"step": 100
|
||||
},
|
||||
{
|
||||
"epoch": 0.2302459445316588,
|
||||
"grad_norm": 8.885427777088127,
|
||||
"learning_rate": 4.7467175306295647e-07,
|
||||
"logits/chosen": -0.7485495805740356,
|
||||
"logits/rejected": -0.6900595426559448,
|
||||
"logps/chosen": -313.8240966796875,
|
||||
"logps/rejected": -310.7196960449219,
|
||||
"loss": 0.6442,
|
||||
"rewards/accuracies": 0.6875,
|
||||
"rewards/chosen": -0.1286977082490921,
|
||||
"rewards/margins": 0.1550484299659729,
|
||||
"rewards/rejected": -0.2837461233139038,
|
||||
"step": 110
|
||||
},
|
||||
{
|
||||
"epoch": 0.25117739403453687,
|
||||
"grad_norm": 10.779857080720818,
|
||||
"learning_rate": 4.6604720940421207e-07,
|
||||
"logits/chosen": -0.6856316328048706,
|
||||
"logits/rejected": -0.6849483251571655,
|
||||
"logps/chosen": -303.8964538574219,
|
||||
"logps/rejected": -321.4309387207031,
|
||||
"loss": 0.6199,
|
||||
"rewards/accuracies": 0.706250011920929,
|
||||
"rewards/chosen": -0.2916944622993469,
|
||||
"rewards/margins": 0.21943287551403046,
|
||||
"rewards/rejected": -0.5111273527145386,
|
||||
"step": 120
|
||||
},
|
||||
{
|
||||
"epoch": 0.272108843537415,
|
||||
"grad_norm": 11.982309184139016,
|
||||
"learning_rate": 4.5626458262912735e-07,
|
||||
"logits/chosen": -0.6801525950431824,
|
||||
"logits/rejected": -0.6393054723739624,
|
||||
"logps/chosen": -319.20672607421875,
|
||||
"logps/rejected": -333.1614685058594,
|
||||
"loss": 0.609,
|
||||
"rewards/accuracies": 0.6625000238418579,
|
||||
"rewards/chosen": -0.27636662125587463,
|
||||
"rewards/margins": 0.2492021769285202,
|
||||
"rewards/rejected": -0.5255688428878784,
|
||||
"step": 130
|
||||
},
|
||||
{
|
||||
"epoch": 0.29304029304029305,
|
||||
"grad_norm": 19.151178395769573,
|
||||
"learning_rate": 4.453763107901675e-07,
|
||||
"logits/chosen": -0.7184507846832275,
|
||||
"logits/rejected": -0.6374621987342834,
|
||||
"logps/chosen": -356.2397766113281,
|
||||
"logps/rejected": -337.32354736328125,
|
||||
"loss": 0.6109,
|
||||
"rewards/accuracies": 0.65625,
|
||||
"rewards/chosen": -0.3082619607448578,
|
||||
"rewards/margins": 0.31162941455841064,
|
||||
"rewards/rejected": -0.6198914051055908,
|
||||
"step": 140
|
||||
},
|
||||
{
|
||||
"epoch": 0.3139717425431711,
|
||||
"grad_norm": 13.824909994267095,
|
||||
"learning_rate": 4.3344075855595097e-07,
|
||||
"logits/chosen": -0.6901696920394897,
|
||||
"logits/rejected": -0.6279430389404297,
|
||||
"logps/chosen": -353.95184326171875,
|
||||
"logps/rejected": -346.9781494140625,
|
||||
"loss": 0.6132,
|
||||
"rewards/accuracies": 0.675000011920929,
|
||||
"rewards/chosen": -0.6322077512741089,
|
||||
"rewards/margins": 0.2923499643802643,
|
||||
"rewards/rejected": -0.9245575666427612,
|
||||
"step": 150
|
||||
},
|
||||
{
|
||||
"epoch": 0.3349031920460492,
|
||||
"grad_norm": 13.009971235384292,
|
||||
"learning_rate": 4.2052190435769554e-07,
|
||||
"logits/chosen": -0.7091597318649292,
|
||||
"logits/rejected": -0.6455188989639282,
|
||||
"logps/chosen": -340.92657470703125,
|
||||
"logps/rejected": -347.0225830078125,
|
||||
"loss": 0.603,
|
||||
"rewards/accuracies": 0.706250011920929,
|
||||
"rewards/chosen": -0.5523598790168762,
|
||||
"rewards/margins": 0.33642885088920593,
|
||||
"rewards/rejected": -0.8887887001037598,
|
||||
"step": 160
|
||||
},
|
||||
{
|
||||
"epoch": 0.35583464154892724,
|
||||
"grad_norm": 13.829339790258013,
|
||||
"learning_rate": 4.0668899744407567e-07,
|
||||
"logits/chosen": -0.6223039627075195,
|
||||
"logits/rejected": -0.5784906148910522,
|
||||
"logps/chosen": -351.1839904785156,
|
||||
"logps/rejected": -358.60479736328125,
|
||||
"loss": 0.5953,
|
||||
"rewards/accuracies": 0.6625000238418579,
|
||||
"rewards/chosen": -0.8050142526626587,
|
||||
"rewards/margins": 0.3386740982532501,
|
||||
"rewards/rejected": -1.143688440322876,
|
||||
"step": 170
|
||||
},
|
||||
{
|
||||
"epoch": 0.37676609105180536,
|
||||
"grad_norm": 20.849086588528724,
|
||||
"learning_rate": 3.920161866827889e-07,
|
||||
"logits/chosen": -0.6424199342727661,
|
||||
"logits/rejected": -0.5930343270301819,
|
||||
"logps/chosen": -358.6197204589844,
|
||||
"logps/rejected": -367.137451171875,
|
||||
"loss": 0.5849,
|
||||
"rewards/accuracies": 0.6625000238418579,
|
||||
"rewards/chosen": -0.8172851800918579,
|
||||
"rewards/margins": 0.3052961528301239,
|
||||
"rewards/rejected": -1.1225812435150146,
|
||||
"step": 180
|
||||
},
|
||||
{
|
||||
"epoch": 0.3976975405546834,
|
||||
"grad_norm": 17.753231429350524,
|
||||
"learning_rate": 3.765821230985757e-07,
|
||||
"logits/chosen": -0.6292937994003296,
|
||||
"logits/rejected": -0.615179717540741,
|
||||
"logps/chosen": -343.19952392578125,
|
||||
"logps/rejected": -375.33929443359375,
|
||||
"loss": 0.5817,
|
||||
"rewards/accuracies": 0.6875,
|
||||
"rewards/chosen": -0.68468177318573,
|
||||
"rewards/margins": 0.34494417905807495,
|
||||
"rewards/rejected": -1.0296258926391602,
|
||||
"step": 190
|
||||
},
|
||||
{
|
||||
"epoch": 0.4186289900575615,
|
||||
"grad_norm": 28.151101969379706,
|
||||
"learning_rate": 3.604695382782159e-07,
|
||||
"logits/chosen": -0.5903419256210327,
|
||||
"logits/rejected": -0.5930633544921875,
|
||||
"logps/chosen": -360.65179443359375,
|
||||
"logps/rejected": -412.225830078125,
|
||||
"loss": 0.5821,
|
||||
"rewards/accuracies": 0.706250011920929,
|
||||
"rewards/chosen": -1.0000778436660767,
|
||||
"rewards/margins": 0.33244088292121887,
|
||||
"rewards/rejected": -1.3325188159942627,
|
||||
"step": 200
|
||||
},
|
||||
{
|
||||
"epoch": 0.43956043956043955,
|
||||
"grad_norm": 17.118814416105273,
|
||||
"learning_rate": 3.4376480090239047e-07,
|
||||
"logits/chosen": -0.6688283085823059,
|
||||
"logits/rejected": -0.5644041895866394,
|
||||
"logps/chosen": -433.589599609375,
|
||||
"logps/rejected": -432.74993896484375,
|
||||
"loss": 0.5853,
|
||||
"rewards/accuracies": 0.75,
|
||||
"rewards/chosen": -1.17396080493927,
|
||||
"rewards/margins": 0.4916624426841736,
|
||||
"rewards/rejected": -1.6656233072280884,
|
||||
"step": 210
|
||||
},
|
||||
{
|
||||
"epoch": 0.4604918890633176,
|
||||
"grad_norm": 23.06196255102623,
|
||||
"learning_rate": 3.265574537815398e-07,
|
||||
"logits/chosen": -0.5818850994110107,
|
||||
"logits/rejected": -0.5700303316116333,
|
||||
"logps/chosen": -351.98638916015625,
|
||||
"logps/rejected": -410.9193420410156,
|
||||
"loss": 0.5704,
|
||||
"rewards/accuracies": 0.6937500238418579,
|
||||
"rewards/chosen": -1.0643993616104126,
|
||||
"rewards/margins": 0.5418257713317871,
|
||||
"rewards/rejected": -1.6062252521514893,
|
||||
"step": 220
|
||||
},
|
||||
{
|
||||
"epoch": 0.48142333856619574,
|
||||
"grad_norm": 21.04733996523729,
|
||||
"learning_rate": 3.0893973387735683e-07,
|
||||
"logits/chosen": -0.6803761720657349,
|
||||
"logits/rejected": -0.6139528751373291,
|
||||
"logps/chosen": -355.9638671875,
|
||||
"logps/rejected": -392.5525207519531,
|
||||
"loss": 0.5753,
|
||||
"rewards/accuracies": 0.7437499761581421,
|
||||
"rewards/chosen": -0.8678590059280396,
|
||||
"rewards/margins": 0.6038464307785034,
|
||||
"rewards/rejected": -1.4717055559158325,
|
||||
"step": 230
|
||||
},
|
||||
{
|
||||
"epoch": 0.5023547880690737,
|
||||
"grad_norm": 18.458075241435953,
|
||||
"learning_rate": 2.910060778827554e-07,
|
||||
"logits/chosen": -0.6669884920120239,
|
||||
"logits/rejected": -0.5953234434127808,
|
||||
"logps/chosen": -363.3609313964844,
|
||||
"logps/rejected": -395.0166320800781,
|
||||
"loss": 0.5445,
|
||||
"rewards/accuracies": 0.7124999761581421,
|
||||
"rewards/chosen": -0.6804043054580688,
|
||||
"rewards/margins": 0.501660168170929,
|
||||
"rewards/rejected": -1.1820645332336426,
|
||||
"step": 240
|
||||
},
|
||||
{
|
||||
"epoch": 0.5232862375719518,
|
||||
"grad_norm": 24.001388696637466,
|
||||
"learning_rate": 2.7285261601056697e-07,
|
||||
"logits/chosen": -0.6821622252464294,
|
||||
"logits/rejected": -0.5736308693885803,
|
||||
"logps/chosen": -392.59375,
|
||||
"logps/rejected": -420.7662658691406,
|
||||
"loss": 0.5592,
|
||||
"rewards/accuracies": 0.75,
|
||||
"rewards/chosen": -1.037414789199829,
|
||||
"rewards/margins": 0.6777431964874268,
|
||||
"rewards/rejected": -1.7151581048965454,
|
||||
"step": 250
|
||||
},
|
||||
{
|
||||
"epoch": 0.54421768707483,
|
||||
"grad_norm": 20.80336944317341,
|
||||
"learning_rate": 2.5457665670441937e-07,
|
||||
"logits/chosen": -0.6666806936264038,
|
||||
"logits/rejected": -0.6545027494430542,
|
||||
"logps/chosen": -379.15423583984375,
|
||||
"logps/rejected": -414.10302734375,
|
||||
"loss": 0.549,
|
||||
"rewards/accuracies": 0.668749988079071,
|
||||
"rewards/chosen": -1.1116763353347778,
|
||||
"rewards/margins": 0.48681873083114624,
|
||||
"rewards/rejected": -1.5984950065612793,
|
||||
"step": 260
|
||||
},
|
||||
{
|
||||
"epoch": 0.565149136577708,
|
||||
"grad_norm": 18.999186941850777,
|
||||
"learning_rate": 2.3627616503391812e-07,
|
||||
"logits/chosen": -0.6578361988067627,
|
||||
"logits/rejected": -0.6166576147079468,
|
||||
"logps/chosen": -415.3619079589844,
|
||||
"logps/rejected": -453.8072814941406,
|
||||
"loss": 0.5596,
|
||||
"rewards/accuracies": 0.78125,
|
||||
"rewards/chosen": -1.0673983097076416,
|
||||
"rewards/margins": 0.6960801482200623,
|
||||
"rewards/rejected": -1.7634785175323486,
|
||||
"step": 270
|
||||
},
|
||||
{
|
||||
"epoch": 0.5860805860805861,
|
||||
"grad_norm": 25.011257430836675,
|
||||
"learning_rate": 2.1804923757009882e-07,
|
||||
"logits/chosen": -0.5656932592391968,
|
||||
"logits/rejected": -0.5232654809951782,
|
||||
"logps/chosen": -409.4795837402344,
|
||||
"logps/rejected": -441.3401794433594,
|
||||
"loss": 0.5636,
|
||||
"rewards/accuracies": 0.6875,
|
||||
"rewards/chosen": -1.4023996591567993,
|
||||
"rewards/margins": 0.5914163589477539,
|
||||
"rewards/rejected": -1.9938161373138428,
|
||||
"step": 280
|
||||
},
|
||||
{
|
||||
"epoch": 0.6070120355834642,
|
||||
"grad_norm": 16.649025738470474,
|
||||
"learning_rate": 1.9999357655598891e-07,
|
||||
"logits/chosen": -0.617931067943573,
|
||||
"logits/rejected": -0.5761314034461975,
|
||||
"logps/chosen": -406.4532775878906,
|
||||
"logps/rejected": -464.67822265625,
|
||||
"loss": 0.5591,
|
||||
"rewards/accuracies": 0.71875,
|
||||
"rewards/chosen": -1.4842880964279175,
|
||||
"rewards/margins": 0.5774600505828857,
|
||||
"rewards/rejected": -2.0617482662200928,
|
||||
"step": 290
|
||||
},
|
||||
{
|
||||
"epoch": 0.6279434850863422,
|
||||
"grad_norm": 16.730004932577106,
|
||||
"learning_rate": 1.8220596619089573e-07,
|
||||
"logits/chosen": -0.6562352180480957,
|
||||
"logits/rejected": -0.5790780186653137,
|
||||
"logps/chosen": -451.573486328125,
|
||||
"logps/rejected": -457.2960510253906,
|
||||
"loss": 0.5394,
|
||||
"rewards/accuracies": 0.668749988079071,
|
||||
"rewards/chosen": -1.1794800758361816,
|
||||
"rewards/margins": 0.5321540832519531,
|
||||
"rewards/rejected": -1.7116340398788452,
|
||||
"step": 300
|
||||
},
|
||||
{
|
||||
"epoch": 0.6488749345892203,
|
||||
"grad_norm": 20.824128290150536,
|
||||
"learning_rate": 1.647817538357072e-07,
|
||||
"logits/chosen": -0.6351410150527954,
|
||||
"logits/rejected": -0.5707007050514221,
|
||||
"logps/chosen": -443.58978271484375,
|
||||
"logps/rejected": -464.4762268066406,
|
||||
"loss": 0.5379,
|
||||
"rewards/accuracies": 0.737500011920929,
|
||||
"rewards/chosen": -1.2887599468231201,
|
||||
"rewards/margins": 0.7534160614013672,
|
||||
"rewards/rejected": -2.042175769805908,
|
||||
"step": 310
|
||||
},
|
||||
{
|
||||
"epoch": 0.6698063840920984,
|
||||
"grad_norm": 21.443811248225554,
|
||||
"learning_rate": 1.478143389201113e-07,
|
||||
"logits/chosen": -0.652029275894165,
|
||||
"logits/rejected": -0.5766469240188599,
|
||||
"logps/chosen": -430.428466796875,
|
||||
"logps/rejected": -456.797607421875,
|
||||
"loss": 0.5387,
|
||||
"rewards/accuracies": 0.7124999761581421,
|
||||
"rewards/chosen": -1.6468877792358398,
|
||||
"rewards/margins": 0.5889655351638794,
|
||||
"rewards/rejected": -2.235853433609009,
|
||||
"step": 320
|
||||
},
|
||||
{
|
||||
"epoch": 0.6907378335949764,
|
||||
"grad_norm": 18.054876727682743,
|
||||
"learning_rate": 1.3139467229135998e-07,
|
||||
"logits/chosen": -0.6749883890151978,
|
||||
"logits/rejected": -0.6647608876228333,
|
||||
"logps/chosen": -418.3038635253906,
|
||||
"logps/rejected": -472.7947692871094,
|
||||
"loss": 0.5367,
|
||||
"rewards/accuracies": 0.7124999761581421,
|
||||
"rewards/chosen": -1.3353520631790161,
|
||||
"rewards/margins": 0.6326448917388916,
|
||||
"rewards/rejected": -1.9679968357086182,
|
||||
"step": 330
|
||||
},
|
||||
{
|
||||
"epoch": 0.7116692830978545,
|
||||
"grad_norm": 26.52212132673544,
|
||||
"learning_rate": 1.1561076868822755e-07,
|
||||
"logits/chosen": -0.6122914552688599,
|
||||
"logits/rejected": -0.5800005793571472,
|
||||
"logps/chosen": -433.57635498046875,
|
||||
"logps/rejected": -465.54669189453125,
|
||||
"loss": 0.5428,
|
||||
"rewards/accuracies": 0.7124999761581421,
|
||||
"rewards/chosen": -1.294762134552002,
|
||||
"rewards/margins": 0.587591290473938,
|
||||
"rewards/rejected": -1.8823535442352295,
|
||||
"step": 340
|
||||
},
|
||||
{
|
||||
"epoch": 0.7326007326007326,
|
||||
"grad_norm": 19.655279922879483,
|
||||
"learning_rate": 1.0054723495346482e-07,
|
||||
"logits/chosen": -0.6827625036239624,
|
||||
"logits/rejected": -0.6138468980789185,
|
||||
"logps/chosen": -397.7406921386719,
|
||||
"logps/rejected": -434.5318298339844,
|
||||
"loss": 0.5167,
|
||||
"rewards/accuracies": 0.7124999761581421,
|
||||
"rewards/chosen": -1.2331862449645996,
|
||||
"rewards/margins": 0.6665691137313843,
|
||||
"rewards/rejected": -1.8997554779052734,
|
||||
"step": 350
|
||||
},
|
||||
{
|
||||
"epoch": 0.7535321821036107,
|
||||
"grad_norm": 19.235838308842027,
|
||||
"learning_rate": 8.628481651367875e-08,
|
||||
"logits/chosen": -0.6631180047988892,
|
||||
"logits/rejected": -0.5895651578903198,
|
||||
"logps/chosen": -461.24176025390625,
|
||||
"logps/rejected": -491.12176513671875,
|
||||
"loss": 0.5577,
|
||||
"rewards/accuracies": 0.699999988079071,
|
||||
"rewards/chosen": -1.4328138828277588,
|
||||
"rewards/margins": 0.6876929402351379,
|
||||
"rewards/rejected": -2.120506763458252,
|
||||
"step": 360
|
||||
},
|
||||
{
|
||||
"epoch": 0.7744636316064888,
|
||||
"grad_norm": 19.962323367834856,
|
||||
"learning_rate": 7.289996455765748e-08,
|
||||
"logits/chosen": -0.6887942552566528,
|
||||
"logits/rejected": -0.6230372190475464,
|
||||
"logps/chosen": -414.96270751953125,
|
||||
"logps/rejected": -449.39874267578125,
|
||||
"loss": 0.5411,
|
||||
"rewards/accuracies": 0.706250011920929,
|
||||
"rewards/chosen": -1.4094620943069458,
|
||||
"rewards/margins": 0.6524969935417175,
|
||||
"rewards/rejected": -2.0619590282440186,
|
||||
"step": 370
|
||||
},
|
||||
{
|
||||
"epoch": 0.7953950811093669,
|
||||
"grad_norm": 17.262620635801852,
|
||||
"learning_rate": 6.046442623320145e-08,
|
||||
"logits/chosen": -0.6020098924636841,
|
||||
"logits/rejected": -0.6000246405601501,
|
||||
"logps/chosen": -398.9418029785156,
|
||||
"logps/rejected": -501.1026916503906,
|
||||
"loss": 0.5387,
|
||||
"rewards/accuracies": 0.7437499761581421,
|
||||
"rewards/chosen": -1.440071940422058,
|
||||
"rewards/margins": 0.8659427762031555,
|
||||
"rewards/rejected": -2.3060147762298584,
|
||||
"step": 380
|
||||
},
|
||||
{
|
||||
"epoch": 0.8163265306122449,
|
||||
"grad_norm": 23.268294087473826,
|
||||
"learning_rate": 4.904486005914027e-08,
|
||||
"logits/chosen": -0.7086952328681946,
|
||||
"logits/rejected": -0.6402121782302856,
|
||||
"logps/chosen": -475.9898376464844,
|
||||
"logps/rejected": -513.5247802734375,
|
||||
"loss": 0.5207,
|
||||
"rewards/accuracies": 0.768750011920929,
|
||||
"rewards/chosen": -1.3465341329574585,
|
||||
"rewards/margins": 0.7135976552963257,
|
||||
"rewards/rejected": -2.060131788253784,
|
||||
"step": 390
|
||||
},
|
||||
{
|
||||
"epoch": 0.837257980115123,
|
||||
"grad_norm": 20.66365114424933,
|
||||
"learning_rate": 3.8702478614051345e-08,
|
||||
"logits/chosen": -0.641961932182312,
|
||||
"logits/rejected": -0.5901409983634949,
|
||||
"logps/chosen": -395.93157958984375,
|
||||
"logps/rejected": -442.00244140625,
|
||||
"loss": 0.5353,
|
||||
"rewards/accuracies": 0.71875,
|
||||
"rewards/chosen": -1.2355709075927734,
|
||||
"rewards/margins": 0.6957671642303467,
|
||||
"rewards/rejected": -1.9313379526138306,
|
||||
"step": 400
|
||||
},
|
||||
{
|
||||
"epoch": 0.858189429618001,
|
||||
"grad_norm": 18.94839439294029,
|
||||
"learning_rate": 2.9492720416985e-08,
|
||||
"logits/chosen": -0.7429651021957397,
|
||||
"logits/rejected": -0.6672912836074829,
|
||||
"logps/chosen": -440.3075256347656,
|
||||
"logps/rejected": -467.0520935058594,
|
||||
"loss": 0.5506,
|
||||
"rewards/accuracies": 0.731249988079071,
|
||||
"rewards/chosen": -1.2028748989105225,
|
||||
"rewards/margins": 0.7443949580192566,
|
||||
"rewards/rejected": -1.9472697973251343,
|
||||
"step": 410
|
||||
},
|
||||
{
|
||||
"epoch": 0.8791208791208791,
|
||||
"grad_norm": 19.442404647939284,
|
||||
"learning_rate": 2.1464952759020856e-08,
|
||||
"logits/chosen": -0.597920298576355,
|
||||
"logits/rejected": -0.586058497428894,
|
||||
"logps/chosen": -406.3937072753906,
|
||||
"logps/rejected": -479.6614685058594,
|
||||
"loss": 0.5384,
|
||||
"rewards/accuracies": 0.6875,
|
||||
"rewards/chosen": -1.3360610008239746,
|
||||
"rewards/margins": 0.6579158902168274,
|
||||
"rewards/rejected": -1.9939768314361572,
|
||||
"step": 420
|
||||
},
|
||||
{
|
||||
"epoch": 0.9000523286237572,
|
||||
"grad_norm": 19.433771609383193,
|
||||
"learning_rate": 1.4662207078575684e-08,
|
||||
"logits/chosen": -0.6416221857070923,
|
||||
"logits/rejected": -0.5748856663703918,
|
||||
"logps/chosen": -432.43292236328125,
|
||||
"logps/rejected": -473.4640197753906,
|
||||
"loss": 0.5268,
|
||||
"rewards/accuracies": 0.6937500238418579,
|
||||
"rewards/chosen": -1.289609670639038,
|
||||
"rewards/margins": 0.6828420162200928,
|
||||
"rewards/rejected": -1.9724515676498413,
|
||||
"step": 430
|
||||
},
|
||||
{
|
||||
"epoch": 0.9209837781266352,
|
||||
"grad_norm": 25.563733256044383,
|
||||
"learning_rate": 9.12094829893642e-09,
|
||||
"logits/chosen": -0.7145225405693054,
|
||||
"logits/rejected": -0.6558529138565063,
|
||||
"logps/chosen": -401.7915344238281,
|
||||
"logps/rejected": -416.718994140625,
|
||||
"loss": 0.5331,
|
||||
"rewards/accuracies": 0.699999988079071,
|
||||
"rewards/chosen": -1.2713123559951782,
|
||||
"rewards/margins": 0.5642818212509155,
|
||||
"rewards/rejected": -1.8355941772460938,
|
||||
"step": 440
|
||||
},
|
||||
{
|
||||
"epoch": 0.9419152276295133,
|
||||
"grad_norm": 18.949969564615213,
|
||||
"learning_rate": 4.8708793644441086e-09,
|
||||
"logits/chosen": -0.5803197622299194,
|
||||
"logits/rejected": -0.5434113144874573,
|
||||
"logps/chosen": -410.92730712890625,
|
||||
"logps/rejected": -468.50323486328125,
|
||||
"loss": 0.534,
|
||||
"rewards/accuracies": 0.7124999761581421,
|
||||
"rewards/chosen": -1.2884615659713745,
|
||||
"rewards/margins": 0.7078900337219238,
|
||||
"rewards/rejected": -1.9963515996932983,
|
||||
"step": 450
|
||||
},
|
||||
{
|
||||
"epoch": 0.9628466771323915,
|
||||
"grad_norm": 22.7412601308732,
|
||||
"learning_rate": 1.9347820230782295e-09,
|
||||
"logits/chosen": -0.6422279477119446,
|
||||
"logits/rejected": -0.5603567361831665,
|
||||
"logps/chosen": -407.45465087890625,
|
||||
"logps/rejected": -431.9442443847656,
|
||||
"loss": 0.5408,
|
||||
"rewards/accuracies": 0.7437499761581421,
|
||||
"rewards/chosen": -1.2375915050506592,
|
||||
"rewards/margins": 0.686954140663147,
|
||||
"rewards/rejected": -1.9245456457138062,
|
||||
"step": 460
|
||||
},
|
||||
{
|
||||
"epoch": 0.9837781266352695,
|
||||
"grad_norm": 17.505664359316036,
|
||||
"learning_rate": 3.2839470889836627e-10,
|
||||
"logits/chosen": -0.6509039998054504,
|
||||
"logits/rejected": -0.6031205654144287,
|
||||
"logps/chosen": -423.6900939941406,
|
||||
"logps/rejected": -475.7867736816406,
|
||||
"loss": 0.5134,
|
||||
"rewards/accuracies": 0.71875,
|
||||
"rewards/chosen": -1.1757431030273438,
|
||||
"rewards/margins": 0.6970219612121582,
|
||||
"rewards/rejected": -1.8727651834487915,
|
||||
"step": 470
|
||||
},
|
||||
{
|
||||
"epoch": 0.9984301412872841,
|
||||
"eval_logits/chosen": -0.5977884531021118,
|
||||
"eval_logits/rejected": -0.5622259378433228,
|
||||
"eval_logps/chosen": -401.6270751953125,
|
||||
"eval_logps/rejected": -481.71246337890625,
|
||||
"eval_loss": 0.5371974110603333,
|
||||
"eval_rewards/accuracies": 0.765625,
|
||||
"eval_rewards/chosen": -1.1911048889160156,
|
||||
"eval_rewards/margins": 0.8110275268554688,
|
||||
"eval_rewards/rejected": -2.0021324157714844,
|
||||
"eval_runtime": 196.6339,
|
||||
"eval_samples_per_second": 10.171,
|
||||
"eval_steps_per_second": 0.163,
|
||||
"step": 477
|
||||
},
|
||||
{
|
||||
"epoch": 0.9984301412872841,
|
||||
"step": 477,
|
||||
"total_flos": 0.0,
|
||||
"train_loss": 0.5847830807387954,
|
||||
"train_runtime": 56722.3251,
|
||||
"train_samples_per_second": 1.078,
|
||||
"train_steps_per_second": 0.008
|
||||
}
|
||||
],
|
||||
"logging_steps": 10,
|
||||
"max_steps": 477,
|
||||
"num_input_tokens_seen": 0,
|
||||
"num_train_epochs": 1,
|
||||
"save_steps": 500,
|
||||
"stateful_callbacks": {
|
||||
"TrainerControl": {
|
||||
"args": {
|
||||
"should_epoch_stop": false,
|
||||
"should_evaluate": false,
|
||||
"should_log": false,
|
||||
"should_save": true,
|
||||
"should_training_stop": true
|
||||
},
|
||||
"attributes": {}
|
||||
}
|
||||
},
|
||||
"total_flos": 0.0,
|
||||
"train_batch_size": 4,
|
||||
"trial_name": null,
|
||||
"trial_params": null
|
||||
}
|
||||
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:11fb779134c17cf577aa0327c379f1b7cd39e06e1953424a167de7aab30e8f84
|
||||
size 7672
|
||||
Reference in New Issue
Block a user