初始化项目，由ModelHub XC社区提供模型

Model: AI-ModelScope/zephyr-7b-gemma-v0.1 Source: Original Platform
2026-05-29 16:56:13 +08:00
commit 4313d9a5be
21 changed files with 1103 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,36 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
 *.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text
--- a/README.md
+++ b/README.md
@@ -0,0 +1,386 @@
 ---
 license: other
 tags:
 - alignment-handbook
 - trl
 - dpo
 - generated_from_trainer
 base_model: HuggingFaceH4/zephyr-7b-gemma-sft-v0.1
 datasets:
 - argilla/dpo-mix-7k
 license_name: gemma-terms-of-use
 license_link: https://ai.google.dev/gemma/terms
 pipeline_tag: text-generation
 model-index:
 - name: zephyr-7b-gemma
  results:
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: MT-Bench
      type: unknown
    metrics:
    - type: unknown
      value: 7.81
      name: score
    source:
      url: https://huggingface.co/spaces/lmsys/mt-bench
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: AI2 Reasoning Challenge (25-Shot)
      type: ai2_arc
      config: ARC-Challenge
      split: test
      args:
        num_few_shot: 25
    metrics:
    - type: acc_norm
      value: 58.45
      name: normalized accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-gemma-v0.1
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: HellaSwag (10-Shot)
      type: hellaswag
      split: validation
      args:
        num_few_shot: 10
    metrics:
    - type: acc_norm
      value: 83.48
      name: normalized accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-gemma-v0.1
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: MMLU (5-Shot)
      type: cais/mmlu
      config: all
      split: test
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 60.68
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-gemma-v0.1
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: TruthfulQA (0-shot)
      type: truthful_qa
      config: multiple_choice
      split: validation
      args:
        num_few_shot: 0
    metrics:
    - type: mc2
      value: 52.07
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-gemma-v0.1
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: Winogrande (5-shot)
      type: winogrande
      config: winogrande_xl
      split: validation
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 74.19
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-gemma-v0.1
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: GSM8k (5-shot)
      type: gsm8k
      config: main
      split: test
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 45.56
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=HuggingFaceH4/zephyr-7b-gemma-v0.1
      name: Open LLM Leaderboard
 ---
 <img src="https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-v0.1/resolve/main/thumbnail.png" alt="Zephyr 7B Gemma Logo" width="800" style="margin-left:'auto' margin-right:'auto' display:'block'"/>
 # Model Card for Zephyr 7B Gemma
 Zephyr is a series of language models that are trained to act as helpful assistants. Zephyr 7B Gemma is the third model in the series, and is a fine-tuned version of [`google/gemma-7b`](https://huggingface.co/google/gemma-7b) that was trained on on a mix of publicly available, synthetic datasets using Direct Preference Optimization (DPO). You can reproduce the training of this model via the recipe provided in the [Alignment Handbook](https://github.com/huggingface/alignment-handbook).
 ## Model description
 - **Model type:** A 7B parameter GPT-like model fine-tuned on a mix of publicly available, synthetic datasets.
 - **Language(s) (NLP):** Primarily English
 - **License:** Gemma Terms of Use
 - **Finetuned from model:** [google/gemma-7b](https://huggingface.co/google/gemma-7b)
 ### Model Sources
 <!-- Provide the basic links for the model. -->
 - **Repository:** https://github.com/huggingface/alignment-handbook
 - **Demo:** https://huggingface.co/spaces/HuggingFaceH4/zephyr-7b-gemma-chat
 ## Performance
 |                                 Model                                 |MT Bench⬇️|IFEval|
 |-----------------------------------------------------------------------|------:|------:|
 |[zephyr-7b-gemma-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-v0.1)|  7.81 |  28.76|
 |[zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)  |  7.34 |  43.81|
 |[google/gemma-7b-it](https://huggingface.co/google/gemma-7b-it)               |  6.38 |  38.01|
 |                                 Model                                 |AGIEval|GPT4All|TruthfulQA|BigBench|Average ⬇️|
 |-----------------------------------------------------------------------|------:|------:|---------:|-------:|------:|
 |[zephyr-7b-beta](https://huggingface.co/HuggingFaceH4/zephyr-7b-beta)  |  37.52|  71.77|     55.26|   39.77|  51.08|
 |[zephyr-7b-gemma-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-v0.1)|  34.22|  66.37|     52.19|   37.10|  47.47|
 |[mlabonne/Gemmalpaca-7B](https://huggingface.co/mlabonne/Gemmalpaca-7B)|  21.6 |  40.87|     44.85 |   30.49|  34.45|
 |[google/gemma-7b-it](https://huggingface.co/google/gemma-7b-it)        |  21.33|  40.84|     41.70|   30.25|  33.53|
 <details><summary>Details of AGIEval, GPT4All, TruthfulQA, BigBench </summary>
 ### AGIEval
 |             Task             |Version| Metric |Value|   |Stderr|
 |------------------------------|------:|--------|----:|---|-----:|
 |agieval_aqua_rat              |      0|acc     |21.65|±  |  2.59|
 |                              |       |acc_norm|25.20|±  |  2.73|
 |agieval_logiqa_en             |      0|acc     |34.72|±  |  1.87|
 |                              |       |acc_norm|35.94|±  |  1.88|
 |agieval_lsat_ar               |      0|acc     |19.57|±  |  2.62|
 |                              |       |acc_norm|21.74|±  |  2.73|
 |agieval_lsat_lr               |      0|acc     |30.59|±  |  2.04|
 |                              |       |acc_norm|32.55|±  |  2.08|
 |agieval_lsat_rc               |      0|acc     |49.07|±  |  3.05|
 |                              |       |acc_norm|42.75|±  |  3.02|
 |agieval_sat_en                |      0|acc     |54.85|±  |  3.48|
 |                              |       |acc_norm|53.40|±  |  3.48|
 |agieval_sat_en_without_passage|      0|acc     |37.38|±  |  3.38|
 |                              |       |acc_norm|33.98|±  |  3.31|
 |agieval_sat_math              |      0|acc     |30.91|±  |  3.12|
 |                              |       |acc_norm|28.18|±  |  3.04|
 Average: 34.22%
 ### GPT4All
 |    Task     |Version| Metric |Value|   |Stderr|
 |-------------|------:|--------|----:|---|-----:|
 |arc_challenge|      0|acc     |49.15|±  |  1.46|
 |             |       |acc_norm|52.47|±  |  1.46|
 |arc_easy     |      0|acc     |77.44|±  |  0.86|
 |             |       |acc_norm|74.75|±  |  0.89|
 |boolq        |      1|acc     |79.69|±  |  0.70|
 |hellaswag    |      0|acc     |60.59|±  |  0.49|
 |             |       |acc_norm|78.00|±  |  0.41|
 |openbookqa   |      0|acc     |29.20|±  |  2.04|
 |             |       |acc_norm|37.80|±  |  2.17|
 |piqa         |      0|acc     |76.82|±  |  0.98|
 |             |       |acc_norm|77.80|±  |  0.97|
 |winogrande   |      0|acc     |64.09|±  |  1.35|
 Average: 66.37%
 ### TruthfulQA
 |    Task     |Version|Metric|Value|   |Stderr|
 |-------------|------:|------|----:|---|-----:|
 |truthfulqa_mc|      1|mc1   |35.74|±  |  1.68|
 |             |       |mc2   |52.19|±  |  1.59|
 Average: 52.19%
 ### Bigbench
 |                      Task                      |Version|       Metric        |Value|   |Stderr|
 |------------------------------------------------|------:|---------------------|----:|---|-----:|
 |bigbench_causal_judgement                       |      0|multiple_choice_grade|53.68|±  |  3.63|
 |bigbench_date_understanding                     |      0|multiple_choice_grade|59.89|±  |  2.55|
 |bigbench_disambiguation_qa                      |      0|multiple_choice_grade|30.23|±  |  2.86|
 |bigbench_geometric_shapes                       |      0|multiple_choice_grade|11.42|±  |  1.68|
 |                                                |       |exact_str_match      | 0.00|±  |  0.00|
 |bigbench_logical_deduction_five_objects         |      0|multiple_choice_grade|28.40|±  |  2.02|
 |bigbench_logical_deduction_seven_objects        |      0|multiple_choice_grade|19.14|±  |  1.49|
 |bigbench_logical_deduction_three_objects        |      0|multiple_choice_grade|44.67|±  |  2.88|
 |bigbench_movie_recommendation                   |      0|multiple_choice_grade|26.80|±  |  1.98|
 |bigbench_navigate                               |      0|multiple_choice_grade|50.00|±  |  1.58|
 |bigbench_reasoning_about_colored_objects        |      0|multiple_choice_grade|52.75|±  |  1.12|
 |bigbench_ruin_names                             |      0|multiple_choice_grade|33.04|±  |  2.22|
 |bigbench_salient_translation_error_detection    |      0|multiple_choice_grade|33.37|±  |  1.49|
 |bigbench_snarks                                 |      0|multiple_choice_grade|48.62|±  |  3.73|
 |bigbench_sports_understanding                   |      0|multiple_choice_grade|58.11|±  |  1.57|
 |bigbench_temporal_sequences                     |      0|multiple_choice_grade|37.20|±  |  1.53|
 |bigbench_tracking_shuffled_objects_five_objects |      0|multiple_choice_grade|20.08|±  |  1.13|
 |bigbench_tracking_shuffled_objects_seven_objects|      0|multiple_choice_grade|15.77|±  |  0.87|
 |bigbench_tracking_shuffled_objects_three_objects|      0|multiple_choice_grade|44.67|±  |  2.88|
 Average: 37.1%
 </details>
 ## Intended uses & limitations
 The model was initially fine-tuned on the [DEITA 10K](https://huggingface.co/datasets/HuggingFaceH4/deita-10k-v0-sft)  dataset, which contains a diverse range of synthetic dialogues generated by ChatGPT. 
 We then further aligned the model with [🤗 TRL's](https://github.com/huggingface/trl) `DPOTrainer` on the [argilla/dpo-mix-7k](https://huggingface.co/datasets/argilla/dpo-mix-7k) dataset, which contains 7k prompts and model completions that are ranked by GPT-4. As a result, the model can be used for chat and you can check out our [demo](https://huggingface.co/spaces/HuggingFaceH4/zephyr-chat) to test its capabilities. 
 Here's how you can run the model using the `pipeline()` function from 🤗 Transformers:
 ```python
 # pip install transformers>=4.38.2
 # pip install accelerate
 import torch
 from transformers import pipeline
 pipe = pipeline(
    "text-generation",
    model="HuggingFaceH4/zephyr-7b-gemma-v0.1",
    device_map="auto",
    torch_dtype=torch.bfloat16,
 )
 messages = [
    {
        "role": "system",
        "content": "",  # Model not yet trained for follow this
    },
    {"role": "user", "content": "How many helicopters can a human eat in one sitting?"},
 ]
 outputs = pipe(
    messages,
    max_new_tokens=128,
    do_sample=True,
    temperature=0.7,
    top_k=50,
    top_p=0.95,
    stop_sequence="<|im_end|>",
 )
 print(outputs[0]["generated_text"][-1]["content"])
 # It is not possible for a human to eat a helicopter in one sitting, as a
 # helicopter is a large and inedible machine. Helicopters are made of metal,
 # plastic, and other materials that are not meant to be consumed by humans.
 # Eating a helicopter would be extremely dangerous and would likely cause
 # serious health problems, including choking, suffocation, and poisoning. It is
 # important to only eat food that is safe and intended for human consumption.
 ```
 ## Bias, Risks, and Limitations
 <!-- This section is meant to convey both technical and sociotechnical limitations. -->
 Zephyr 7B Gemma has not been aligned to human preferences for safety within the RLHF phase or deployed with in-the-loop filtering of responses like ChatGPT, so the model can produce problematic outputs (especially when prompted to do so). It is also unknown what the size and composition of the corpus was used to train the base model (`google/gemma-7b`), however it is likely to have included a mix of Web data and technical sources like books and code. See the [StarCoder2 model card](https://huggingface.co/bigcode/starcoder2-15b) for an example of this.
 ## Training and evaluation data
 This model is a fine-tuned version of [HuggingFaceH4/zephyr-7b-gemma-sft-v0.1](https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-sft-v0.1) on the argilla/dpo-mix-7k dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.4695
 - Rewards/chosen: -3.3746
 - Rewards/rejected: -4.9715
 - Rewards/accuracies: 0.7188
 - Rewards/margins: 1.5970
 - Logps/rejected: -459.4853
 - Logps/chosen: -429.9115
 - Logits/rejected: 86.4684
 - Logits/chosen: 92.8200
 ### Training hyperparameters
 The following hyperparameters were used during training:
 - learning_rate: 5e-07
 - train_batch_size: 2
 - eval_batch_size: 4
 - seed: 42
 - distributed_type: multi-GPU
 - num_devices: 8
 - gradient_accumulation_steps: 8
 - total_train_batch_size: 128
 - total_eval_batch_size: 32
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_ratio: 0.1
 - num_epochs: 2
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
 |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
 | 0.1923        | 1.9   | 100  | 0.4736          | -3.4575        | -4.9556          | 0.75               | 1.4980          | -459.1662      | -431.5707    | 86.3863         | 92.7360       |
 ### Framework versions
 - Transformers 4.39.0.dev0
 - Pytorch 2.1.2+cu121
 - Datasets 2.14.6
 - Tokenizers 0.15.1
 ## Citation Information
 If you find this model useful in your work, please consider citing the Zephyr technical report:
 ```
@misc{tunstall2023zephyr,
      title={Zephyr: Direct Distillation of LM Alignment}, 
      author={Lewis Tunstall and Edward Beeching and Nathan Lambert and Nazneen Rajani and Kashif Rasul and Younes Belkada and Shengyi Huang and Leandro von Werra and Clémentine Fourrier and Nathan Habib and Nathan Sarrazin and Omar Sanseviero and Alexander M. Rush and Thomas Wolf},
      year={2023},
      eprint={2310.16944},
      archivePrefix={arXiv},
      primaryClass={cs.LG}
 }
 ```
 You may also wish to cite the creators of this model as well:
 ```
@misc{zephyr_7b_gemma,
  author = {Lewis Tunstall and Philipp Schmid},
  title = {Zephyr 7B Gemma},
  year = {2024},
  publisher = {Hugging Face},
  journal = {Hugging Face repository},
  howpublished = {\url{https://huggingface.co/HuggingFaceH4/zephyr-7b-gemma-v0.1}}
 }
 ```
 # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
 Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_HuggingFaceH4__zephyr-7b-gemma-v0.1)
 |             Metric              |Value|
 |---------------------------------|----:|
 |Avg.                             |62.41|
 |AI2 Reasoning Challenge (25-Shot)|58.45|
 |HellaSwag (10-Shot)              |83.48|
 |MMLU (5-Shot)                    |60.68|
 |TruthfulQA (0-shot)              |52.07|
 |Winogrande (5-shot)              |74.19|
 |GSM8k (5-shot)                   |45.56|
--- a/all_results.json
+++ b/all_results.json
@@ -0,0 +1,21 @@
 {
    "epoch": 1.97,
    "eval_logits/chosen": 92.81997680664062,
    "eval_logits/rejected": 86.46841430664062,
    "eval_logps/chosen": -429.9114685058594,
    "eval_logps/rejected": -459.4852600097656,
    "eval_loss": 0.4695254862308502,
    "eval_rewards/accuracies": 0.71875,
    "eval_rewards/chosen": -3.3745555877685547,
    "eval_rewards/margins": 1.5969535112380981,
    "eval_rewards/rejected": -4.9715094566345215,
    "eval_runtime": 52.4051,
    "eval_samples": 750,
    "eval_samples_per_second": 14.312,
    "eval_steps_per_second": 0.458,
    "train_loss": 0.38887147261546207,
    "train_runtime": 1183.8142,
    "train_samples": 6750,
    "train_samples_per_second": 11.404,
    "train_steps_per_second": 0.088
 }
--- a/config.json
+++ b/config.json
@@ -0,0 +1,28 @@
 {
  "_name_or_path": "lewtun/zephyr-7b-gemma-sft",
  "architectures": [
    "GemmaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 2,
  "eos_token_id": 1,
  "head_dim": 256,
  "hidden_act": "gelu",
  "hidden_size": 3072,
  "initializer_range": 0.02,
  "intermediate_size": 24576,
  "max_position_embeddings": 8192,
  "model_type": "gemma",
  "num_attention_heads": 16,
  "num_hidden_layers": 28,
  "num_key_value_heads": 16,
  "pad_token_id": 0,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.39.0.dev0",
  "use_cache": true,
  "vocab_size": 256000
 }
--- a/configuration.json
+++ b/configuration.json
@@ -0,0 +1 @@
 {"framework": "pytorch", "task": "text-generation", "allow_remote": true}
--- a/eval_results.json
+++ b/eval_results.json
@@ -0,0 +1,16 @@
 {
    "epoch": 1.97,
    "eval_logits/chosen": 92.81997680664062,
    "eval_logits/rejected": 86.46841430664062,
    "eval_logps/chosen": -429.9114685058594,
    "eval_logps/rejected": -459.4852600097656,
    "eval_loss": 0.4695254862308502,
    "eval_rewards/accuracies": 0.71875,
    "eval_rewards/chosen": -3.3745555877685547,
    "eval_rewards/margins": 1.5969535112380981,
    "eval_rewards/rejected": -4.9715094566345215,
    "eval_runtime": 52.4051,
    "eval_samples": 750,
    "eval_samples_per_second": 14.312,
    "eval_steps_per_second": 0.458
 }
--- a/generation_config.json
+++ b/generation_config.json
@@ -0,0 +1,7 @@
 {
  "_from_model_config": true,
  "bos_token_id": 2,
  "eos_token_id": 1,
  "pad_token_id": 0,
  "transformers_version": "4.39.0.dev0"
 }
--- a/model-00001-of-00004.safetensors
+++ b/model-00001-of-00004.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:47f0626a622dd18f71418e3681623f56f2a8fc13ac958699e6bd4f96f573b3d3
 size 4995496656
--- a/model-00002-of-00004.safetensors
+++ b/model-00002-of-00004.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:c5f35edf44fe0ca2af76f44deea436a0af070519bfc3b62568322b746e402e42
 size 4982953168
--- a/model-00003-of-00004.safetensors
+++ b/model-00003-of-00004.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:0cf70aee6d272e72089ffe79d0e95fe6b2911271094fd1045fde46d9a85d03f3
 size 4982953200
--- a/model-00004-of-00004.safetensors
+++ b/model-00004-of-00004.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:a44c6a49480428c9b23c7dbe1a78bb506ae0c44687442d62f88282308792472a
 size 2113988336
--- a/model.safetensors.index.json
+++ b/model.safetensors.index.json
@@ -0,0 +1,261 @@
 {
  "metadata": {
    "total_size": 17075361792
  },
  "weight_map": {
    "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.15.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.15.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.15.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.16.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.16.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.16.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.16.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.17.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.17.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.17.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.17.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.18.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.18.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.18.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.24.input_layernorm.weight": "model-00004-of-00004.safetensors",
    "model.layers.24.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.24.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.24.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.24.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
    "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
    "model.layers.25.input_layernorm.weight": "model-00004-of-00004.safetensors",
    "model.layers.25.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.25.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.25.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.25.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
    "model.layers.25.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.25.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.25.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.25.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.26.input_layernorm.weight": "model-00004-of-00004.safetensors",
    "model.layers.26.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.26.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.26.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.26.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
    "model.layers.26.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.26.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.26.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.26.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.27.input_layernorm.weight": "model-00004-of-00004.safetensors",
    "model.layers.27.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.27.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.27.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.27.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
    "model.layers.27.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.27.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.27.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.27.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
    "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.6.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.6.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.6.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
    "model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
    "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
    "model.norm.weight": "model-00004-of-00004.safetensors"
  }
 }
--- a/runs/Mar01_11-14-37_ip-26-0-170-132/events.out.tfevents.1709291765.ip-26-0-170-132.2208298.0
+++ b/runs/Mar01_11-14-37_ip-26-0-170-132/events.out.tfevents.1709291765.ip-26-0-170-132.2208298.0
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:d53b43fb646020d14dd8d6b911f800a4da1e70553f3faf621520dc8fa9ee73fe
 size 13304
--- a/runs/Mar01_11-14-37_ip-26-0-170-132/events.out.tfevents.1709293097.ip-26-0-170-132.2208298.1
+++ b/runs/Mar01_11-14-37_ip-26-0-170-132/events.out.tfevents.1709293097.ip-26-0-170-132.2208298.1
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:056a7f6db597040a25a4449e5cc73eb025c0f7c3a5b436f829fbf4df0000983a
 size 815
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,34 @@
 {
  "additional_special_tokens": [
    "<|im_start|>",
    "<|im_end|>"
  ],
  "bos_token": {
    "content": "<bos>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "eos_token": {
    "content": "<eos>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "pad_token": {
    "content": "<pad>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "unk_token": {
    "content": "<unk>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  }
 }
--- a/thumbnail.png
+++ b/thumbnail.png
--- a/tokenizer.json
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:22449cb9ef4bad0db7dd93b46ddff7ab7d6a654dd4f903e130ddb6361eac3af5
 size 17477473
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,70 @@
 {
  "add_bos_token": false,
  "add_eos_token": false,
  "added_tokens_decoder": {
    "0": {
      "content": "<pad>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "1": {
      "content": "<eos>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "2": {
      "content": "<bos>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "3": {
      "content": "<unk>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "106": {
      "content": "<|im_start|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "107": {
      "content": "<|im_end|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    }
  },
  "additional_special_tokens": [
    "<|im_start|>",
    "<|im_end|>"
  ],
  "bos_token": "<bos>",
  "chat_template": "{% if messages[0]['role'] == 'user' or messages[0]['role'] == 'system' %}{{ bos_token }}{% endif %}{% for message in messages %}{{ '<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n' }}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% elif messages[-1]['role'] == 'assistant' %}{{ eos_token }}{% endif %}",
  "clean_up_tokenization_spaces": false,
  "eos_token": "<eos>",
  "legacy": null,
  "model_max_length": 2048,
  "pad_token": "<pad>",
  "sp_model_kwargs": {},
  "spaces_between_special_tokens": false,
  "tokenizer_class": "GemmaTokenizer",
  "unk_token": "<unk>",
  "use_default_system_prompt": false
 }
--- a/train_results.json
+++ b/train_results.json
@@ -0,0 +1,8 @@
 {
    "epoch": 1.97,
    "train_loss": 0.38887147261546207,
    "train_runtime": 1183.8142,
    "train_samples": 6750,
    "train_samples_per_second": 11.404,
    "train_steps_per_second": 0.088
 }
--- a/trainer_state.json
+++ b/trainer_state.json
@@ -0,0 +1,211 @@
 {
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.971563981042654,
  "eval_steps": 100,
  "global_step": 104,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.02,
      "grad_norm": 139.638709617328,
      "learning_rate": 4.545454545454545e-08,
      "logits/chosen": 111.16130065917969,
      "logits/rejected": 86.8372802734375,
      "logps/chosen": -326.8536071777344,
      "logps/rejected": -329.15960693359375,
      "loss": 0.6931,
      "rewards/accuracies": 0.0,
      "rewards/chosen": 0.0,
      "rewards/margins": 0.0,
      "rewards/rejected": 0.0,
      "step": 1
    },
    {
      "epoch": 0.19,
      "grad_norm": 141.5345140695996,
      "learning_rate": 4.545454545454545e-07,
      "logits/chosen": 110.37065124511719,
      "logits/rejected": 133.2639923095703,
      "logps/chosen": -350.3541259765625,
      "logps/rejected": -434.3558349609375,
      "loss": 0.7191,
      "rewards/accuracies": 0.4722222089767456,
      "rewards/chosen": 0.13274627923965454,
      "rewards/margins": 0.07573667168617249,
      "rewards/rejected": 0.05700961872935295,
      "step": 10
    },
    {
      "epoch": 0.38,
      "grad_norm": 123.71909837085582,
      "learning_rate": 4.885348141000122e-07,
      "logits/chosen": 117.74342346191406,
      "logits/rejected": 128.52548217773438,
      "logps/chosen": -333.21240234375,
      "logps/rejected": -410.2923889160156,
      "loss": 0.6097,
      "rewards/accuracies": 0.7124999761581421,
      "rewards/chosen": 0.11470325291156769,
      "rewards/margins": 0.7479402422904968,
      "rewards/rejected": -0.6332370042800903,
      "step": 20
    },
    {
      "epoch": 0.57,
      "grad_norm": 111.89651526533274,
      "learning_rate": 4.5025027361734613e-07,
      "logits/chosen": 114.44095611572266,
      "logits/rejected": 119.11683654785156,
      "logps/chosen": -399.1412048339844,
      "logps/rejected": -474.2645568847656,
      "loss": 0.596,
      "rewards/accuracies": 0.7250000238418579,
      "rewards/chosen": -1.7276217937469482,
      "rewards/margins": 1.0803521871566772,
      "rewards/rejected": -2.807974100112915,
      "step": 30
    },
    {
      "epoch": 0.76,
      "grad_norm": 102.67088507130228,
      "learning_rate": 3.893311157806091e-07,
      "logits/chosen": 116.33101654052734,
      "logits/rejected": 111.0595703125,
      "logps/chosen": -428.7275390625,
      "logps/rejected": -464.0934143066406,
      "loss": 0.5343,
      "rewards/accuracies": 0.7250000238418579,
      "rewards/chosen": -2.2770252227783203,
      "rewards/margins": 0.9522085189819336,
      "rewards/rejected": -3.229233503341675,
      "step": 40
    },
    {
      "epoch": 0.95,
      "grad_norm": 130.9996197198566,
      "learning_rate": 3.126631330646801e-07,
      "logits/chosen": 123.2393569946289,
      "logits/rejected": 124.50789642333984,
      "logps/chosen": -438.548095703125,
      "logps/rejected": -474.1234436035156,
      "loss": 0.5138,
      "rewards/accuracies": 0.762499988079071,
      "rewards/chosen": -2.3258581161499023,
      "rewards/margins": 1.3220884799957275,
      "rewards/rejected": -3.647946834564209,
      "step": 50
    },
    {
      "epoch": 1.14,
      "grad_norm": 56.950942870641875,
      "learning_rate": 2.2891223348923882e-07,
      "logits/chosen": 122.619384765625,
      "logits/rejected": 126.1447525024414,
      "logps/chosen": -414.3634338378906,
      "logps/rejected": -468.19586181640625,
      "loss": 0.2724,
      "rewards/accuracies": 0.893750011920929,
      "rewards/chosen": -2.3773388862609863,
      "rewards/margins": 2.358515501022339,
      "rewards/rejected": -4.735854148864746,
      "step": 60
    },
    {
      "epoch": 1.33,
      "grad_norm": 52.820355390804025,
      "learning_rate": 1.4754491880085317e-07,
      "logits/chosen": 117.16709899902344,
      "logits/rejected": 118.9737319946289,
      "logps/chosen": -387.70526123046875,
      "logps/rejected": -511.97503662109375,
      "loss": 0.1936,
      "rewards/accuracies": 0.9437500238418579,
      "rewards/chosen": -2.4186935424804688,
      "rewards/margins": 2.5914835929870605,
      "rewards/rejected": -5.010177135467529,
      "step": 70
    },
    {
      "epoch": 1.52,
      "grad_norm": 51.657826972971314,
      "learning_rate": 7.775827023107834e-08,
      "logits/chosen": 124.15473937988281,
      "logits/rejected": 125.7086181640625,
      "logps/chosen": -446.75421142578125,
      "logps/rejected": -543.6109619140625,
      "loss": 0.1779,
      "rewards/accuracies": 0.981249988079071,
      "rewards/chosen": -2.316882848739624,
      "rewards/margins": 2.962496757507324,
      "rewards/rejected": -5.279379844665527,
      "step": 80
    },
    {
      "epoch": 1.71,
      "grad_norm": 86.34373603352554,
      "learning_rate": 2.7440387297912122e-08,
      "logits/chosen": 107.07579040527344,
      "logits/rejected": 111.74522399902344,
      "logps/chosen": -425.4237365722656,
      "logps/rejected": -509.67718505859375,
      "loss": 0.1765,
      "rewards/accuracies": 0.9437500238418579,
      "rewards/chosen": -2.749206066131592,
      "rewards/margins": 3.0597147941589355,
      "rewards/rejected": -5.8089213371276855,
      "step": 90
    },
    {
      "epoch": 1.9,
      "grad_norm": 51.66215546933828,
      "learning_rate": 2.27878296044029e-09,
      "logits/chosen": 123.38490295410156,
      "logits/rejected": 113.675537109375,
      "logps/chosen": -439.7268981933594,
      "logps/rejected": -550.8162841796875,
      "loss": 0.1923,
      "rewards/accuracies": 0.9624999761581421,
      "rewards/chosen": -2.560769557952881,
      "rewards/margins": 3.2135703563690186,
      "rewards/rejected": -5.77433967590332,
      "step": 100
    },
    {
      "epoch": 1.9,
      "eval_logits/chosen": 92.73604583740234,
      "eval_logits/rejected": 86.38631439208984,
      "eval_logps/chosen": -431.5707092285156,
      "eval_logps/rejected": -459.1661682128906,
      "eval_loss": 0.4735770523548126,
      "eval_rewards/accuracies": 0.75,
      "eval_rewards/chosen": -3.4575202465057373,
      "eval_rewards/margins": 1.4980329275131226,
      "eval_rewards/rejected": -4.9555535316467285,
      "eval_runtime": 50.3064,
      "eval_samples_per_second": 14.909,
      "eval_steps_per_second": 0.477,
      "step": 100
    },
    {
      "epoch": 1.97,
      "step": 104,
      "total_flos": 0.0,
      "train_loss": 0.38887147261546207,
      "train_runtime": 1183.8142,
      "train_samples_per_second": 11.404,
      "train_steps_per_second": 0.088
    }
  ],
  "logging_steps": 10,
  "max_steps": 104,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 500,
  "total_flos": 0.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
 }
--- a/training_args.bin
+++ b/training_args.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:8589802c11c0d3f4f0b3f41cc39886efb5aedf91208c7140f796699543e50a29
 size 6136
		`@@ -0,0 +1 @@`
							`{"framework": "pytorch", "task": "text-generation", "allow_remote": true}`