初始化项目,由ModelHub XC社区提供模型
Model: haoranxu/Llama-3-Instruct-8B-SimPO Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
61
README.md
Normal file
61
README.md
Normal file
@@ -0,0 +1,61 @@
|
||||
---
|
||||
license: llama3
|
||||
base_model: meta-llama/Meta-Llama-3-8B-Instruct
|
||||
tags:
|
||||
- alignment-handbook
|
||||
- generated_from_trainer
|
||||
datasets:
|
||||
- princeton-nlp/llama3-ultrafeedback
|
||||
model-index:
|
||||
- name: llama-3-8b-instruct-simpo
|
||||
results: []
|
||||
---
|
||||
|
||||
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
||||
should probably proofread and complete it, then remove this comment. -->
|
||||
|
||||
# llama-3-8b-instruct-simpo
|
||||
|
||||
This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on the princeton-nlp/llama3-ultrafeedback dataset.
|
||||
|
||||
## Model description
|
||||
|
||||
More information needed
|
||||
|
||||
## Intended uses & limitations
|
||||
|
||||
More information needed
|
||||
|
||||
## Training and evaluation data
|
||||
|
||||
More information needed
|
||||
|
||||
## Training procedure
|
||||
|
||||
### Training hyperparameters
|
||||
|
||||
The following hyperparameters were used during training:
|
||||
- learning_rate: 1e-06
|
||||
- train_batch_size: 2
|
||||
- eval_batch_size: 4
|
||||
- seed: 42
|
||||
- distributed_type: multi-GPU
|
||||
- num_devices: 16
|
||||
- gradient_accumulation_steps: 8
|
||||
- total_train_batch_size: 256
|
||||
- total_eval_batch_size: 64
|
||||
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
||||
- lr_scheduler_type: cosine
|
||||
- lr_scheduler_warmup_ratio: 0.1
|
||||
- num_epochs: 1
|
||||
|
||||
### Training results
|
||||
|
||||
|
||||
|
||||
### Framework versions
|
||||
|
||||
- Transformers 4.41.2
|
||||
- Pytorch 2.3.1+rocm6.0
|
||||
- Datasets 2.19.2
|
||||
- Tokenizers 0.19.1
|
||||
22
all_results.json
Normal file
22
all_results.json
Normal file
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"epoch": 1.0,
|
||||
"eval_logits/chosen": 0.19371525943279266,
|
||||
"eval_logits/rejected": 0.24551750719547272,
|
||||
"eval_logps/chosen": -0.5973814725875854,
|
||||
"eval_logps/rejected": -0.7979374527931213,
|
||||
"eval_loss": 1.46084463596344,
|
||||
"eval_rewards/accuracies": 0.6290322542190552,
|
||||
"eval_rewards/chosen": -1.4934533834457397,
|
||||
"eval_rewards/margins": 0.5013901591300964,
|
||||
"eval_rewards/rejected": -1.9948437213897705,
|
||||
"eval_runtime": 100.7391,
|
||||
"eval_samples": 1961,
|
||||
"eval_samples_per_second": 19.466,
|
||||
"eval_steps_per_second": 0.308,
|
||||
"total_flos": 0.0,
|
||||
"train_loss": 1.5168422256779468,
|
||||
"train_runtime": 9435.3453,
|
||||
"train_samples": 59876,
|
||||
"train_samples_per_second": 6.346,
|
||||
"train_steps_per_second": 0.025
|
||||
}
|
||||
29
config.json
Normal file
29
config.json
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"_name_or_path": "meta-llama/Meta-Llama-3-8B-Instruct",
|
||||
"architectures": [
|
||||
"LlamaForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 128000,
|
||||
"eos_token_id": 128009,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 4096,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 14336,
|
||||
"max_position_embeddings": 8192,
|
||||
"mlp_bias": false,
|
||||
"model_type": "llama",
|
||||
"num_attention_heads": 32,
|
||||
"num_hidden_layers": 32,
|
||||
"num_key_value_heads": 8,
|
||||
"pretraining_tp": 1,
|
||||
"rms_norm_eps": 1e-05,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 500000.0,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "bfloat16",
|
||||
"transformers_version": "4.41.2",
|
||||
"use_cache": true,
|
||||
"vocab_size": 128256
|
||||
}
|
||||
16
eval_results.json
Normal file
16
eval_results.json
Normal file
@@ -0,0 +1,16 @@
|
||||
{
|
||||
"epoch": 1.0,
|
||||
"eval_logits/chosen": 0.19371525943279266,
|
||||
"eval_logits/rejected": 0.24551750719547272,
|
||||
"eval_logps/chosen": -0.5973814725875854,
|
||||
"eval_logps/rejected": -0.7979374527931213,
|
||||
"eval_loss": 1.46084463596344,
|
||||
"eval_rewards/accuracies": 0.6290322542190552,
|
||||
"eval_rewards/chosen": -1.4934533834457397,
|
||||
"eval_rewards/margins": 0.5013901591300964,
|
||||
"eval_rewards/rejected": -1.9948437213897705,
|
||||
"eval_runtime": 100.7391,
|
||||
"eval_samples": 1961,
|
||||
"eval_samples_per_second": 19.466,
|
||||
"eval_steps_per_second": 0.308
|
||||
}
|
||||
12
generation_config.json
Normal file
12
generation_config.json
Normal file
@@ -0,0 +1,12 @@
|
||||
{
|
||||
"bos_token_id": 128000,
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
128001,
|
||||
128009
|
||||
],
|
||||
"max_length": 4096,
|
||||
"temperature": 0.6,
|
||||
"top_p": 0.9,
|
||||
"transformers_version": "4.41.2"
|
||||
}
|
||||
3
model-00001-of-00004.safetensors
Normal file
3
model-00001-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0ffb6a0d60ad57f0acf3fb3758544885cbf37bf53d780c0b7c40d6542f8bbf06
|
||||
size 4976698672
|
||||
3
model-00002-of-00004.safetensors
Normal file
3
model-00002-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:76ebe53122e0baf52553818a65ebcc84854e9de3f5a9a50c4d96db54c77a1bf9
|
||||
size 4999802720
|
||||
3
model-00003-of-00004.safetensors
Normal file
3
model-00003-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:75fd8710f9a0e09d0d74e01fe4738e0300888e2e59090ec86b32c0121e1c2c04
|
||||
size 4915916176
|
||||
3
model-00004-of-00004.safetensors
Normal file
3
model-00004-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:09c1e2e9c4fe4fb6daa1abbecb6b54e562d17cd78aa77bb6dea2db558b2e5d19
|
||||
size 1168138808
|
||||
298
model.safetensors.index.json
Normal file
298
model.safetensors.index.json
Normal file
@@ -0,0 +1,298 @@
|
||||
{
|
||||
"metadata": {
|
||||
"total_size": 16060522496
|
||||
},
|
||||
"weight_map": {
|
||||
"lm_head.weight": "model-00004-of-00004.safetensors",
|
||||
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.30.input_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.31.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.31.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
|
||||
"model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
|
||||
"model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
|
||||
"model.norm.weight": "model-00004-of-00004.safetensors"
|
||||
}
|
||||
}
|
||||
17
special_tokens_map.json
Normal file
17
special_tokens_map.json
Normal file
@@ -0,0 +1,17 @@
|
||||
{
|
||||
"bos_token": {
|
||||
"content": "<|begin_of_text|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eos_token": {
|
||||
"content": "<|eot_id|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": "<|eot_id|>"
|
||||
}
|
||||
410563
tokenizer.json
Normal file
410563
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
2063
tokenizer_config.json
Normal file
2063
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
9
train_results.json
Normal file
9
train_results.json
Normal file
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"epoch": 1.0,
|
||||
"total_flos": 0.0,
|
||||
"train_loss": 1.5168422256779468,
|
||||
"train_runtime": 9435.3453,
|
||||
"train_samples": 59876,
|
||||
"train_samples_per_second": 6.346,
|
||||
"train_steps_per_second": 0.025
|
||||
}
|
||||
747
trainer_state.json
Normal file
747
trainer_state.json
Normal file
@@ -0,0 +1,747 @@
|
||||
{
|
||||
"best_metric": null,
|
||||
"best_model_checkpoint": null,
|
||||
"epoch": 1.0,
|
||||
"eval_steps": 400,
|
||||
"global_step": 234,
|
||||
"is_hyper_param_search": false,
|
||||
"is_local_process_zero": true,
|
||||
"is_world_process_zero": true,
|
||||
"log_history": [
|
||||
{
|
||||
"epoch": 0.004273504273504274,
|
||||
"grad_norm": 13.34490065414622,
|
||||
"learning_rate": 4.166666666666666e-08,
|
||||
"logits/chosen": -0.0994097888469696,
|
||||
"logits/rejected": -0.05551636964082718,
|
||||
"logps/chosen": -0.3415659964084625,
|
||||
"logps/rejected": -0.47305911779403687,
|
||||
"loss": 1.6161,
|
||||
"rewards/accuracies": 0.625,
|
||||
"rewards/chosen": -0.8539150357246399,
|
||||
"rewards/margins": 0.32873278856277466,
|
||||
"rewards/rejected": -1.1826478242874146,
|
||||
"step": 1
|
||||
},
|
||||
{
|
||||
"epoch": 0.021367521367521368,
|
||||
"grad_norm": 13.589601768721613,
|
||||
"learning_rate": 2.0833333333333333e-07,
|
||||
"logits/chosen": -0.1462363749742508,
|
||||
"logits/rejected": -0.14229728281497955,
|
||||
"logps/chosen": -0.4933474361896515,
|
||||
"logps/rejected": -0.5222159624099731,
|
||||
"loss": 1.6602,
|
||||
"rewards/accuracies": 0.5,
|
||||
"rewards/chosen": -1.2333686351776123,
|
||||
"rewards/margins": 0.07217135280370712,
|
||||
"rewards/rejected": -1.3055399656295776,
|
||||
"step": 5
|
||||
},
|
||||
{
|
||||
"epoch": 0.042735042735042736,
|
||||
"grad_norm": 8.87565340921273,
|
||||
"learning_rate": 4.1666666666666667e-07,
|
||||
"logits/chosen": -0.06335792690515518,
|
||||
"logits/rejected": -0.02190782129764557,
|
||||
"logps/chosen": -0.42108288407325745,
|
||||
"logps/rejected": -0.4607582688331604,
|
||||
"loss": 1.6154,
|
||||
"rewards/accuracies": 0.6000000238418579,
|
||||
"rewards/chosen": -1.052707314491272,
|
||||
"rewards/margins": 0.09918837249279022,
|
||||
"rewards/rejected": -1.151895523071289,
|
||||
"step": 10
|
||||
},
|
||||
{
|
||||
"epoch": 0.0641025641025641,
|
||||
"grad_norm": 8.660979149745721,
|
||||
"learning_rate": 6.249999999999999e-07,
|
||||
"logits/chosen": -0.03177808225154877,
|
||||
"logits/rejected": -0.046066295355558395,
|
||||
"logps/chosen": -0.4381338953971863,
|
||||
"logps/rejected": -0.4405650198459625,
|
||||
"loss": 1.6068,
|
||||
"rewards/accuracies": 0.48750001192092896,
|
||||
"rewards/chosen": -1.095334768295288,
|
||||
"rewards/margins": 0.006077909376472235,
|
||||
"rewards/rejected": -1.1014125347137451,
|
||||
"step": 15
|
||||
},
|
||||
{
|
||||
"epoch": 0.08547008547008547,
|
||||
"grad_norm": 11.952248546593465,
|
||||
"learning_rate": 8.333333333333333e-07,
|
||||
"logits/chosen": -0.15588010847568512,
|
||||
"logits/rejected": -0.1342618316411972,
|
||||
"logps/chosen": -0.4571867883205414,
|
||||
"logps/rejected": -0.4777159094810486,
|
||||
"loss": 1.614,
|
||||
"rewards/accuracies": 0.5874999761581421,
|
||||
"rewards/chosen": -1.1429669857025146,
|
||||
"rewards/margins": 0.0513228178024292,
|
||||
"rewards/rejected": -1.1942898035049438,
|
||||
"step": 20
|
||||
},
|
||||
{
|
||||
"epoch": 0.10683760683760683,
|
||||
"grad_norm": 7.754135247761182,
|
||||
"learning_rate": 9.999440509051367e-07,
|
||||
"logits/chosen": -0.11927574872970581,
|
||||
"logits/rejected": -0.09945164620876312,
|
||||
"logps/chosen": -0.4351424276828766,
|
||||
"logps/rejected": -0.4545895457267761,
|
||||
"loss": 1.578,
|
||||
"rewards/accuracies": 0.4749999940395355,
|
||||
"rewards/chosen": -1.0878560543060303,
|
||||
"rewards/margins": 0.048617832362651825,
|
||||
"rewards/rejected": -1.1364738941192627,
|
||||
"step": 25
|
||||
},
|
||||
{
|
||||
"epoch": 0.1282051282051282,
|
||||
"grad_norm": 7.538242001814111,
|
||||
"learning_rate": 9.979871469976195e-07,
|
||||
"logits/chosen": -0.10869207233190536,
|
||||
"logits/rejected": -0.10381748527288437,
|
||||
"logps/chosen": -0.34100422263145447,
|
||||
"logps/rejected": -0.3819103240966797,
|
||||
"loss": 1.5753,
|
||||
"rewards/accuracies": 0.637499988079071,
|
||||
"rewards/chosen": -0.8525105714797974,
|
||||
"rewards/margins": 0.10226528346538544,
|
||||
"rewards/rejected": -0.9547758102416992,
|
||||
"step": 30
|
||||
},
|
||||
{
|
||||
"epoch": 0.14957264957264957,
|
||||
"grad_norm": 12.195052142813049,
|
||||
"learning_rate": 9.932452969617607e-07,
|
||||
"logits/chosen": -0.1797598898410797,
|
||||
"logits/rejected": -0.08878383785486221,
|
||||
"logps/chosen": -0.39731845259666443,
|
||||
"logps/rejected": -0.4476427435874939,
|
||||
"loss": 1.5794,
|
||||
"rewards/accuracies": 0.4625000059604645,
|
||||
"rewards/chosen": -0.9932962656021118,
|
||||
"rewards/margins": 0.12581071257591248,
|
||||
"rewards/rejected": -1.1191068887710571,
|
||||
"step": 35
|
||||
},
|
||||
{
|
||||
"epoch": 0.17094017094017094,
|
||||
"grad_norm": 9.951467872149747,
|
||||
"learning_rate": 9.857450191464337e-07,
|
||||
"logits/chosen": -0.1144944578409195,
|
||||
"logits/rejected": -0.09615515172481537,
|
||||
"logps/chosen": -0.4008924067020416,
|
||||
"logps/rejected": -0.4632578492164612,
|
||||
"loss": 1.5425,
|
||||
"rewards/accuracies": 0.5625,
|
||||
"rewards/chosen": -1.0022308826446533,
|
||||
"rewards/margins": 0.1559135764837265,
|
||||
"rewards/rejected": -1.158144474029541,
|
||||
"step": 40
|
||||
},
|
||||
{
|
||||
"epoch": 0.19230769230769232,
|
||||
"grad_norm": 9.233295836736795,
|
||||
"learning_rate": 9.755282581475767e-07,
|
||||
"logits/chosen": -0.13462205231189728,
|
||||
"logits/rejected": -0.11864318698644638,
|
||||
"logps/chosen": -0.38421568274497986,
|
||||
"logps/rejected": -0.4823899269104004,
|
||||
"loss": 1.563,
|
||||
"rewards/accuracies": 0.4625000059604645,
|
||||
"rewards/chosen": -0.9605391621589661,
|
||||
"rewards/margins": 0.24543562531471252,
|
||||
"rewards/rejected": -1.205974817276001,
|
||||
"step": 45
|
||||
},
|
||||
{
|
||||
"epoch": 0.21367521367521367,
|
||||
"grad_norm": 8.910807590586288,
|
||||
"learning_rate": 9.626521502369983e-07,
|
||||
"logits/chosen": -0.19716130197048187,
|
||||
"logits/rejected": -0.15895314514636993,
|
||||
"logps/chosen": -0.37459030747413635,
|
||||
"logps/rejected": -0.4011107385158539,
|
||||
"loss": 1.5687,
|
||||
"rewards/accuracies": 0.5874999761581421,
|
||||
"rewards/chosen": -0.9364757537841797,
|
||||
"rewards/margins": 0.06630120426416397,
|
||||
"rewards/rejected": -1.002776861190796,
|
||||
"step": 50
|
||||
},
|
||||
{
|
||||
"epoch": 0.23504273504273504,
|
||||
"grad_norm": 8.741144982685217,
|
||||
"learning_rate": 9.471887038331684e-07,
|
||||
"logits/chosen": -0.07297103852033615,
|
||||
"logits/rejected": -0.045126646757125854,
|
||||
"logps/chosen": -0.3764435052871704,
|
||||
"logps/rejected": -0.4278687834739685,
|
||||
"loss": 1.5595,
|
||||
"rewards/accuracies": 0.612500011920929,
|
||||
"rewards/chosen": -0.9411088228225708,
|
||||
"rewards/margins": 0.12856322526931763,
|
||||
"rewards/rejected": -1.0696719884872437,
|
||||
"step": 55
|
||||
},
|
||||
{
|
||||
"epoch": 0.2564102564102564,
|
||||
"grad_norm": 9.65813041875177,
|
||||
"learning_rate": 9.29224396800933e-07,
|
||||
"logits/chosen": -0.00632941210642457,
|
||||
"logits/rejected": -0.04972558468580246,
|
||||
"logps/chosen": -0.3871188163757324,
|
||||
"logps/rejected": -0.42081761360168457,
|
||||
"loss": 1.5433,
|
||||
"rewards/accuracies": 0.637499988079071,
|
||||
"rewards/chosen": -0.9677971005439758,
|
||||
"rewards/margins": 0.08424701541662216,
|
||||
"rewards/rejected": -1.052044153213501,
|
||||
"step": 60
|
||||
},
|
||||
{
|
||||
"epoch": 0.2777777777777778,
|
||||
"grad_norm": 9.22020584708003,
|
||||
"learning_rate": 9.088596928322157e-07,
|
||||
"logits/chosen": 0.052268654108047485,
|
||||
"logits/rejected": 0.07172085344791412,
|
||||
"logps/chosen": -0.5376943945884705,
|
||||
"logps/rejected": -0.6326268911361694,
|
||||
"loss": 1.5646,
|
||||
"rewards/accuracies": 0.612500011920929,
|
||||
"rewards/chosen": -1.3442360162734985,
|
||||
"rewards/margins": 0.23733112215995789,
|
||||
"rewards/rejected": -1.5815670490264893,
|
||||
"step": 65
|
||||
},
|
||||
{
|
||||
"epoch": 0.29914529914529914,
|
||||
"grad_norm": 13.413709386986495,
|
||||
"learning_rate": 8.862084796122997e-07,
|
||||
"logits/chosen": -0.014689329080283642,
|
||||
"logits/rejected": 0.023155853152275085,
|
||||
"logps/chosen": -0.4990696310997009,
|
||||
"logps/rejected": -0.5445122718811035,
|
||||
"loss": 1.5438,
|
||||
"rewards/accuracies": 0.5,
|
||||
"rewards/chosen": -1.2476739883422852,
|
||||
"rewards/margins": 0.11360664665699005,
|
||||
"rewards/rejected": -1.3612806797027588,
|
||||
"step": 70
|
||||
},
|
||||
{
|
||||
"epoch": 0.32051282051282054,
|
||||
"grad_norm": 11.427386040917398,
|
||||
"learning_rate": 8.613974319136957e-07,
|
||||
"logits/chosen": -0.13034021854400635,
|
||||
"logits/rejected": -0.12927956879138947,
|
||||
"logps/chosen": -0.41988080739974976,
|
||||
"logps/rejected": -0.5410174131393433,
|
||||
"loss": 1.5502,
|
||||
"rewards/accuracies": 0.5874999761581421,
|
||||
"rewards/chosen": -1.0497019290924072,
|
||||
"rewards/margins": 0.3028416335582733,
|
||||
"rewards/rejected": -1.352543592453003,
|
||||
"step": 75
|
||||
},
|
||||
{
|
||||
"epoch": 0.3418803418803419,
|
||||
"grad_norm": 8.049393765401897,
|
||||
"learning_rate": 8.34565303179429e-07,
|
||||
"logits/chosen": -0.028346195816993713,
|
||||
"logits/rejected": -0.03619036823511124,
|
||||
"logps/chosen": -0.45006662607192993,
|
||||
"logps/rejected": -0.5116376876831055,
|
||||
"loss": 1.5312,
|
||||
"rewards/accuracies": 0.550000011920929,
|
||||
"rewards/chosen": -1.125166416168213,
|
||||
"rewards/margins": 0.15392769873142242,
|
||||
"rewards/rejected": -1.2790940999984741,
|
||||
"step": 80
|
||||
},
|
||||
{
|
||||
"epoch": 0.36324786324786323,
|
||||
"grad_norm": 10.027579862082366,
|
||||
"learning_rate": 8.058621495575031e-07,
|
||||
"logits/chosen": -0.14632529020309448,
|
||||
"logits/rejected": -0.09482773393392563,
|
||||
"logps/chosen": -0.4204605221748352,
|
||||
"logps/rejected": -0.5197293758392334,
|
||||
"loss": 1.5235,
|
||||
"rewards/accuracies": 0.550000011920929,
|
||||
"rewards/chosen": -1.051151156425476,
|
||||
"rewards/margins": 0.24817219376564026,
|
||||
"rewards/rejected": -1.299323320388794,
|
||||
"step": 85
|
||||
},
|
||||
{
|
||||
"epoch": 0.38461538461538464,
|
||||
"grad_norm": 11.778153989300435,
|
||||
"learning_rate": 7.754484907260512e-07,
|
||||
"logits/chosen": -0.03493024781346321,
|
||||
"logits/rejected": -0.005648002028465271,
|
||||
"logps/chosen": -0.48695850372314453,
|
||||
"logps/rejected": -0.6510589718818665,
|
||||
"loss": 1.4999,
|
||||
"rewards/accuracies": 0.574999988079071,
|
||||
"rewards/chosen": -1.2173962593078613,
|
||||
"rewards/margins": 0.4102511405944824,
|
||||
"rewards/rejected": -1.6276471614837646,
|
||||
"step": 90
|
||||
},
|
||||
{
|
||||
"epoch": 0.405982905982906,
|
||||
"grad_norm": 9.302069462672513,
|
||||
"learning_rate": 7.434944122021836e-07,
|
||||
"logits/chosen": -0.03053552471101284,
|
||||
"logits/rejected": 0.008784343488514423,
|
||||
"logps/chosen": -0.48170527815818787,
|
||||
"logps/rejected": -0.7114425897598267,
|
||||
"loss": 1.4935,
|
||||
"rewards/accuracies": 0.625,
|
||||
"rewards/chosen": -1.2042630910873413,
|
||||
"rewards/margins": 0.5743432641029358,
|
||||
"rewards/rejected": -1.7786064147949219,
|
||||
"step": 95
|
||||
},
|
||||
{
|
||||
"epoch": 0.42735042735042733,
|
||||
"grad_norm": 15.155125489577449,
|
||||
"learning_rate": 7.101786141547828e-07,
|
||||
"logits/chosen": -0.04211338609457016,
|
||||
"logits/rejected": -0.033037807792425156,
|
||||
"logps/chosen": -0.5078177452087402,
|
||||
"logps/rejected": -0.5776672959327698,
|
||||
"loss": 1.515,
|
||||
"rewards/accuracies": 0.5625,
|
||||
"rewards/chosen": -1.2695444822311401,
|
||||
"rewards/margins": 0.17462393641471863,
|
||||
"rewards/rejected": -1.4441683292388916,
|
||||
"step": 100
|
||||
},
|
||||
{
|
||||
"epoch": 0.44871794871794873,
|
||||
"grad_norm": 16.110970936557738,
|
||||
"learning_rate": 6.756874120406714e-07,
|
||||
"logits/chosen": -0.09111454337835312,
|
||||
"logits/rejected": -0.05689894035458565,
|
||||
"logps/chosen": -0.53609699010849,
|
||||
"logps/rejected": -0.6224747896194458,
|
||||
"loss": 1.5115,
|
||||
"rewards/accuracies": 0.512499988079071,
|
||||
"rewards/chosen": -1.3402423858642578,
|
||||
"rewards/margins": 0.21594436466693878,
|
||||
"rewards/rejected": -1.5561867952346802,
|
||||
"step": 105
|
||||
},
|
||||
{
|
||||
"epoch": 0.4700854700854701,
|
||||
"grad_norm": 15.393371096801042,
|
||||
"learning_rate": 6.402136946530014e-07,
|
||||
"logits/chosen": -0.03849278762936592,
|
||||
"logits/rejected": 0.008776647970080376,
|
||||
"logps/chosen": -0.6499117016792297,
|
||||
"logps/rejected": -0.800051212310791,
|
||||
"loss": 1.5033,
|
||||
"rewards/accuracies": 0.6499999761581421,
|
||||
"rewards/chosen": -1.6247793436050415,
|
||||
"rewards/margins": 0.3753485679626465,
|
||||
"rewards/rejected": -2.0001277923583984,
|
||||
"step": 110
|
||||
},
|
||||
{
|
||||
"epoch": 0.49145299145299143,
|
||||
"grad_norm": 9.808767922675623,
|
||||
"learning_rate": 6.039558454088795e-07,
|
||||
"logits/chosen": -0.03172523155808449,
|
||||
"logits/rejected": -0.04225381836295128,
|
||||
"logps/chosen": -0.5940315127372742,
|
||||
"logps/rejected": -0.7107864618301392,
|
||||
"loss": 1.5234,
|
||||
"rewards/accuracies": 0.574999988079071,
|
||||
"rewards/chosen": -1.4850788116455078,
|
||||
"rewards/margins": 0.29188722372055054,
|
||||
"rewards/rejected": -1.7769660949707031,
|
||||
"step": 115
|
||||
},
|
||||
{
|
||||
"epoch": 0.5128205128205128,
|
||||
"grad_norm": 11.007927364769593,
|
||||
"learning_rate": 5.671166329088277e-07,
|
||||
"logits/chosen": -0.15589627623558044,
|
||||
"logits/rejected": -0.14328333735466003,
|
||||
"logps/chosen": -0.5461645126342773,
|
||||
"logps/rejected": -0.6048396825790405,
|
||||
"loss": 1.4873,
|
||||
"rewards/accuracies": 0.550000011920929,
|
||||
"rewards/chosen": -1.3654112815856934,
|
||||
"rewards/margins": 0.14668798446655273,
|
||||
"rewards/rejected": -1.512099027633667,
|
||||
"step": 120
|
||||
},
|
||||
{
|
||||
"epoch": 0.5341880341880342,
|
||||
"grad_norm": 13.40581329613937,
|
||||
"learning_rate": 5.299020769725171e-07,
|
||||
"logits/chosen": -0.1017662063241005,
|
||||
"logits/rejected": -0.05507459491491318,
|
||||
"logps/chosen": -0.5289615392684937,
|
||||
"logps/rejected": -0.6923493146896362,
|
||||
"loss": 1.5008,
|
||||
"rewards/accuracies": 0.6499999761581421,
|
||||
"rewards/chosen": -1.322403907775879,
|
||||
"rewards/margins": 0.4084695875644684,
|
||||
"rewards/rejected": -1.730873465538025,
|
||||
"step": 125
|
||||
},
|
||||
{
|
||||
"epoch": 0.5555555555555556,
|
||||
"grad_norm": 10.393815491734465,
|
||||
"learning_rate": 4.925202964923683e-07,
|
||||
"logits/chosen": -0.01592491939663887,
|
||||
"logits/rejected": 0.10016925632953644,
|
||||
"logps/chosen": -0.545333981513977,
|
||||
"logps/rejected": -0.6946025490760803,
|
||||
"loss": 1.4829,
|
||||
"rewards/accuracies": 0.6875,
|
||||
"rewards/chosen": -1.3633348941802979,
|
||||
"rewards/margins": 0.3731713891029358,
|
||||
"rewards/rejected": -1.7365062236785889,
|
||||
"step": 130
|
||||
},
|
||||
{
|
||||
"epoch": 0.5769230769230769,
|
||||
"grad_norm": 12.796199500434565,
|
||||
"learning_rate": 4.5518034554828327e-07,
|
||||
"logits/chosen": -0.15349504351615906,
|
||||
"logits/rejected": -0.0872931182384491,
|
||||
"logps/chosen": -0.5930777788162231,
|
||||
"logps/rejected": -0.7436810731887817,
|
||||
"loss": 1.5284,
|
||||
"rewards/accuracies": 0.550000011920929,
|
||||
"rewards/chosen": -1.4826946258544922,
|
||||
"rewards/margins": 0.3765079379081726,
|
||||
"rewards/rejected": -1.8592026233673096,
|
||||
"step": 135
|
||||
},
|
||||
{
|
||||
"epoch": 0.5982905982905983,
|
||||
"grad_norm": 12.725430343830277,
|
||||
"learning_rate": 4.180910442924311e-07,
|
||||
"logits/chosen": -0.07290254533290863,
|
||||
"logits/rejected": -0.041871629655361176,
|
||||
"logps/chosen": -0.6430836915969849,
|
||||
"logps/rejected": -0.8283632397651672,
|
||||
"loss": 1.4923,
|
||||
"rewards/accuracies": 0.550000011920929,
|
||||
"rewards/chosen": -1.607709288597107,
|
||||
"rewards/margins": 0.4631989598274231,
|
||||
"rewards/rejected": -2.070908308029175,
|
||||
"step": 140
|
||||
},
|
||||
{
|
||||
"epoch": 0.6196581196581197,
|
||||
"grad_norm": 11.626811297979486,
|
||||
"learning_rate": 3.814598111422513e-07,
|
||||
"logits/chosen": -0.11089099943637848,
|
||||
"logits/rejected": -0.08382478356361389,
|
||||
"logps/chosen": -0.6920641660690308,
|
||||
"logps/rejected": -0.7230005264282227,
|
||||
"loss": 1.5009,
|
||||
"rewards/accuracies": 0.512499988079071,
|
||||
"rewards/chosen": -1.7301604747772217,
|
||||
"rewards/margins": 0.07734106481075287,
|
||||
"rewards/rejected": -1.8075014352798462,
|
||||
"step": 145
|
||||
},
|
||||
{
|
||||
"epoch": 0.6410256410256411,
|
||||
"grad_norm": 10.775765508549604,
|
||||
"learning_rate": 3.454915028125263e-07,
|
||||
"logits/chosen": -0.045754365622997284,
|
||||
"logits/rejected": -0.01993427611887455,
|
||||
"logps/chosen": -0.6500915288925171,
|
||||
"logps/rejected": -0.7409160733222961,
|
||||
"loss": 1.4998,
|
||||
"rewards/accuracies": 0.5874999761581421,
|
||||
"rewards/chosen": -1.6252286434173584,
|
||||
"rewards/margins": 0.22706131637096405,
|
||||
"rewards/rejected": -1.8522899150848389,
|
||||
"step": 150
|
||||
},
|
||||
{
|
||||
"epoch": 0.6623931623931624,
|
||||
"grad_norm": 14.344149642351613,
|
||||
"learning_rate": 3.1038726867353583e-07,
|
||||
"logits/chosen": -0.06552598625421524,
|
||||
"logits/rejected": -0.047451216727495193,
|
||||
"logps/chosen": -0.5619519948959351,
|
||||
"logps/rejected": -0.7450312972068787,
|
||||
"loss": 1.5001,
|
||||
"rewards/accuracies": 0.6000000238418579,
|
||||
"rewards/chosen": -1.4048798084259033,
|
||||
"rewards/margins": 0.4576982855796814,
|
||||
"rewards/rejected": -1.8625783920288086,
|
||||
"step": 155
|
||||
},
|
||||
{
|
||||
"epoch": 0.6837606837606838,
|
||||
"grad_norm": 15.714808233921314,
|
||||
"learning_rate": 2.763434258421836e-07,
|
||||
"logits/chosen": -0.033187855035066605,
|
||||
"logits/rejected": 0.04786054790019989,
|
||||
"logps/chosen": -0.5716092586517334,
|
||||
"logps/rejected": -0.7472088932991028,
|
||||
"loss": 1.4866,
|
||||
"rewards/accuracies": 0.625,
|
||||
"rewards/chosen": -1.4290231466293335,
|
||||
"rewards/margins": 0.4389989972114563,
|
||||
"rewards/rejected": -1.8680222034454346,
|
||||
"step": 160
|
||||
},
|
||||
{
|
||||
"epoch": 0.7051282051282052,
|
||||
"grad_norm": 12.685566519530033,
|
||||
"learning_rate": 2.4355036129704696e-07,
|
||||
"logits/chosen": 0.05670114606618881,
|
||||
"logits/rejected": 0.08839456737041473,
|
||||
"logps/chosen": -0.5581383109092712,
|
||||
"logps/rejected": -0.7727221250534058,
|
||||
"loss": 1.4701,
|
||||
"rewards/accuracies": 0.6625000238418579,
|
||||
"rewards/chosen": -1.395345687866211,
|
||||
"rewards/margins": 0.5364596247673035,
|
||||
"rewards/rejected": -1.9318053722381592,
|
||||
"step": 165
|
||||
},
|
||||
{
|
||||
"epoch": 0.7264957264957265,
|
||||
"grad_norm": 15.872452807830859,
|
||||
"learning_rate": 2.121914671571633e-07,
|
||||
"logits/chosen": 0.03589984029531479,
|
||||
"logits/rejected": 0.07213737815618515,
|
||||
"logps/chosen": -0.5885176658630371,
|
||||
"logps/rejected": -0.7095993757247925,
|
||||
"loss": 1.4749,
|
||||
"rewards/accuracies": 0.625,
|
||||
"rewards/chosen": -1.4712941646575928,
|
||||
"rewards/margins": 0.3027040958404541,
|
||||
"rewards/rejected": -1.773998498916626,
|
||||
"step": 170
|
||||
},
|
||||
{
|
||||
"epoch": 0.7478632478632479,
|
||||
"grad_norm": 10.782809870588457,
|
||||
"learning_rate": 1.824421150789106e-07,
|
||||
"logits/chosen": 0.08967064321041107,
|
||||
"logits/rejected": 0.07906897366046906,
|
||||
"logps/chosen": -0.7075825929641724,
|
||||
"logps/rejected": -0.8367185592651367,
|
||||
"loss": 1.4649,
|
||||
"rewards/accuracies": 0.5874999761581421,
|
||||
"rewards/chosen": -1.7689564228057861,
|
||||
"rewards/margins": 0.32283997535705566,
|
||||
"rewards/rejected": -2.091796398162842,
|
||||
"step": 175
|
||||
},
|
||||
{
|
||||
"epoch": 0.7692307692307693,
|
||||
"grad_norm": 19.45895258920461,
|
||||
"learning_rate": 1.5446867550656767e-07,
|
||||
"logits/chosen": 0.09190338104963303,
|
||||
"logits/rejected": 0.11659294366836548,
|
||||
"logps/chosen": -0.6805117130279541,
|
||||
"logps/rejected": -0.8247642517089844,
|
||||
"loss": 1.472,
|
||||
"rewards/accuracies": 0.5375000238418579,
|
||||
"rewards/chosen": -1.7012790441513062,
|
||||
"rewards/margins": 0.3606313467025757,
|
||||
"rewards/rejected": -2.061910629272461,
|
||||
"step": 180
|
||||
},
|
||||
{
|
||||
"epoch": 0.7905982905982906,
|
||||
"grad_norm": 12.591384251278834,
|
||||
"learning_rate": 1.284275872613028e-07,
|
||||
"logits/chosen": 0.09435538947582245,
|
||||
"logits/rejected": 0.0583331473171711,
|
||||
"logps/chosen": -0.6257596015930176,
|
||||
"logps/rejected": -0.7002542018890381,
|
||||
"loss": 1.4664,
|
||||
"rewards/accuracies": 0.612500011920929,
|
||||
"rewards/chosen": -1.5643991231918335,
|
||||
"rewards/margins": 0.18623651564121246,
|
||||
"rewards/rejected": -1.7506357431411743,
|
||||
"step": 185
|
||||
},
|
||||
{
|
||||
"epoch": 0.811965811965812,
|
||||
"grad_norm": 11.80568242770119,
|
||||
"learning_rate": 1.044644826718295e-07,
|
||||
"logits/chosen": 0.08234812319278717,
|
||||
"logits/rejected": 0.08470721542835236,
|
||||
"logps/chosen": -0.6872076988220215,
|
||||
"logps/rejected": -0.8852859735488892,
|
||||
"loss": 1.4845,
|
||||
"rewards/accuracies": 0.5874999761581421,
|
||||
"rewards/chosen": -1.7180191278457642,
|
||||
"rewards/margins": 0.4951957166194916,
|
||||
"rewards/rejected": -2.213214635848999,
|
||||
"step": 190
|
||||
},
|
||||
{
|
||||
"epoch": 0.8333333333333334,
|
||||
"grad_norm": 10.82000720224868,
|
||||
"learning_rate": 8.271337313934867e-08,
|
||||
"logits/chosen": 0.09420228004455566,
|
||||
"logits/rejected": 0.07983645796775818,
|
||||
"logps/chosen": -0.6325824856758118,
|
||||
"logps/rejected": -0.799116313457489,
|
||||
"loss": 1.4378,
|
||||
"rewards/accuracies": 0.612500011920929,
|
||||
"rewards/chosen": -1.5814563035964966,
|
||||
"rewards/margins": 0.41633448004722595,
|
||||
"rewards/rejected": -1.9977906942367554,
|
||||
"step": 195
|
||||
},
|
||||
{
|
||||
"epoch": 0.8547008547008547,
|
||||
"grad_norm": 10.37262835016834,
|
||||
"learning_rate": 6.329589969143517e-08,
|
||||
"logits/chosen": -0.02971937693655491,
|
||||
"logits/rejected": -0.009283095598220825,
|
||||
"logps/chosen": -0.6813124418258667,
|
||||
"logps/rejected": -0.7548044323921204,
|
||||
"loss": 1.4691,
|
||||
"rewards/accuracies": 0.574999988079071,
|
||||
"rewards/chosen": -1.703281044960022,
|
||||
"rewards/margins": 0.18373003602027893,
|
||||
"rewards/rejected": -1.887010931968689,
|
||||
"step": 200
|
||||
},
|
||||
{
|
||||
"epoch": 0.8760683760683761,
|
||||
"grad_norm": 16.81161049195176,
|
||||
"learning_rate": 4.6320652716067555e-08,
|
||||
"logits/chosen": 0.057117123156785965,
|
||||
"logits/rejected": 0.033341288566589355,
|
||||
"logps/chosen": -0.5831505656242371,
|
||||
"logps/rejected": -0.7539544701576233,
|
||||
"loss": 1.4888,
|
||||
"rewards/accuracies": 0.675000011920929,
|
||||
"rewards/chosen": -1.457876443862915,
|
||||
"rewards/margins": 0.4270097315311432,
|
||||
"rewards/rejected": -1.8848861455917358,
|
||||
"step": 205
|
||||
},
|
||||
{
|
||||
"epoch": 0.8974358974358975,
|
||||
"grad_norm": 10.666184736871127,
|
||||
"learning_rate": 3.188256468013139e-08,
|
||||
"logits/chosen": 0.05842505767941475,
|
||||
"logits/rejected": 0.09373210370540619,
|
||||
"logps/chosen": -0.6092408299446106,
|
||||
"logps/rejected": -0.7288607954978943,
|
||||
"loss": 1.4636,
|
||||
"rewards/accuracies": 0.6625000238418579,
|
||||
"rewards/chosen": -1.523101806640625,
|
||||
"rewards/margins": 0.2990500330924988,
|
||||
"rewards/rejected": -1.8221518993377686,
|
||||
"step": 210
|
||||
},
|
||||
{
|
||||
"epoch": 0.9188034188034188,
|
||||
"grad_norm": 13.64833883057287,
|
||||
"learning_rate": 2.0062379228555525e-08,
|
||||
"logits/chosen": -0.03809903562068939,
|
||||
"logits/rejected": -0.026596253737807274,
|
||||
"logps/chosen": -0.6271125078201294,
|
||||
"logps/rejected": -0.6994240880012512,
|
||||
"loss": 1.4417,
|
||||
"rewards/accuracies": 0.637499988079071,
|
||||
"rewards/chosen": -1.5677810907363892,
|
||||
"rewards/margins": 0.18077896535396576,
|
||||
"rewards/rejected": -1.7485601902008057,
|
||||
"step": 215
|
||||
},
|
||||
{
|
||||
"epoch": 0.9401709401709402,
|
||||
"grad_norm": 11.81936042932564,
|
||||
"learning_rate": 1.0926199633097154e-08,
|
||||
"logits/chosen": 0.06771688163280487,
|
||||
"logits/rejected": 0.06887342035770416,
|
||||
"logps/chosen": -0.6525846123695374,
|
||||
"logps/rejected": -0.7262920141220093,
|
||||
"loss": 1.4542,
|
||||
"rewards/accuracies": 0.625,
|
||||
"rewards/chosen": -1.631461501121521,
|
||||
"rewards/margins": 0.18426847457885742,
|
||||
"rewards/rejected": -1.815730094909668,
|
||||
"step": 220
|
||||
},
|
||||
{
|
||||
"epoch": 0.9615384615384616,
|
||||
"grad_norm": 13.440051766867484,
|
||||
"learning_rate": 4.5251191160326495e-09,
|
||||
"logits/chosen": -0.03224249184131622,
|
||||
"logits/rejected": 0.010183418169617653,
|
||||
"logps/chosen": -0.6054807901382446,
|
||||
"logps/rejected": -0.7111561894416809,
|
||||
"loss": 1.4562,
|
||||
"rewards/accuracies": 0.637499988079071,
|
||||
"rewards/chosen": -1.513702154159546,
|
||||
"rewards/margins": 0.26418834924697876,
|
||||
"rewards/rejected": -1.7778904438018799,
|
||||
"step": 225
|
||||
},
|
||||
{
|
||||
"epoch": 0.9829059829059829,
|
||||
"grad_norm": 11.45030232204693,
|
||||
"learning_rate": 8.949351161324225e-10,
|
||||
"logits/chosen": 0.15793287754058838,
|
||||
"logits/rejected": 0.19319342076778412,
|
||||
"logps/chosen": -0.8483369946479797,
|
||||
"logps/rejected": -0.9106278419494629,
|
||||
"loss": 1.4913,
|
||||
"rewards/accuracies": 0.5249999761581421,
|
||||
"rewards/chosen": -2.120842456817627,
|
||||
"rewards/margins": 0.1557270735502243,
|
||||
"rewards/rejected": -2.276569366455078,
|
||||
"step": 230
|
||||
},
|
||||
{
|
||||
"epoch": 1.0,
|
||||
"step": 234,
|
||||
"total_flos": 0.0,
|
||||
"train_loss": 1.5168422256779468,
|
||||
"train_runtime": 9435.3453,
|
||||
"train_samples_per_second": 6.346,
|
||||
"train_steps_per_second": 0.025
|
||||
}
|
||||
],
|
||||
"logging_steps": 5,
|
||||
"max_steps": 234,
|
||||
"num_input_tokens_seen": 0,
|
||||
"num_train_epochs": 1,
|
||||
"save_steps": 1000000,
|
||||
"stateful_callbacks": {
|
||||
"TrainerControl": {
|
||||
"args": {
|
||||
"should_epoch_stop": false,
|
||||
"should_evaluate": false,
|
||||
"should_log": false,
|
||||
"should_save": false,
|
||||
"should_training_stop": false
|
||||
},
|
||||
"attributes": {}
|
||||
}
|
||||
},
|
||||
"total_flos": 0.0,
|
||||
"train_batch_size": 2,
|
||||
"trial_name": null,
|
||||
"trial_params": null
|
||||
}
|
||||
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9ba2bf1744ecdb04513952d6ff836423e485b4afd619fc842854e79fdcf76ea4
|
||||
size 6456
|
||||
Reference in New Issue
Block a user