初始化项目，由ModelHub XC社区提供模型

Model: liminerity/mm4-3b Source: Original Platform
2026-06-13 11:56:16 +08:00
commit d3ee1f240f
10 changed files with 677 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,35 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
 *.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
--- a/README.md
+++ b/README.md
@@ -0,0 +1,143 @@
 ---
 license: apache-2.0
 datasets:
 - teknium/GPT4-LLM-Cleaned
 - vicgalle/alpaca-gpt4
 model-index:
 - name: mm4-3b
  results:
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: AI2 Reasoning Challenge (25-Shot)
      type: ai2_arc
      config: ARC-Challenge
      split: test
      args:
        num_few_shot: 25
    metrics:
    - type: acc_norm
      value: 44.8
      name: normalized accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=liminerity/mm4-3b
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: HellaSwag (10-Shot)
      type: hellaswag
      split: validation
      args:
        num_few_shot: 10
    metrics:
    - type: acc_norm
      value: 70.41
      name: normalized accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=liminerity/mm4-3b
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: MMLU (5-Shot)
      type: cais/mmlu
      config: all
      split: test
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 50.9
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=liminerity/mm4-3b
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: TruthfulQA (0-shot)
      type: truthful_qa
      config: multiple_choice
      split: validation
      args:
        num_few_shot: 0
    metrics:
    - type: mc2
      value: 43.2
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=liminerity/mm4-3b
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: Winogrande (5-shot)
      type: winogrande
      config: winogrande_xl
      split: validation
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 66.22
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=liminerity/mm4-3b
      name: Open LLM Leaderboard
  - task:
      type: text-generation
      name: Text Generation
    dataset:
      name: GSM8k (5-shot)
      type: gsm8k
      config: main
      split: test
      args:
        num_few_shot: 5
    metrics:
    - type: acc
      value: 43.82
      name: accuracy
    source:
      url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=liminerity/mm4-3b
      name: Open LLM Leaderboard
 ---
 MM4-3b
 a llama based model i made thru extensive training and merging
 ill explain later i literally made so many models today
 Title: Divergent Knowledge Enhancement through Retrograde Merging Strategies: Redefining Accuracy Perspectives in Language Model Evolution
 Abstract: Have you picked up any bad habits, or have you ever learned to do something incorrectly, only to realize you must completly relearn whatever it is you're trying to accomplish? In this proposal, we present an innovative and unconventional approach to enhancing the performance and knowledge base of natural language models. Our proposed method, titled 'Divergent Knowledge Enhancement through Retrograde Merging Strategies' (DKE-RS), aims to challenge traditional practices in model development by incorporating a deliberate back-and-forth merger between high and low accuracy language models.
 The initial conceptualization of DKE-RS stemmed from the realization that learning often encompasses both acquisition and unlearning, as encapsulated by the quote, "learning is just as sacred as unlearning." The proposed technique commences with a baseline model, 'blur-7b,' attaining an accuracy rate of 72.1%, subsequently merged with a Mistral fine-tuned model on the Dolphin dataset, only achieving a 46% accuracy level.
 By deliberately merging with less accurate models and retracing the evolutionary process, DKE-RS aims to broaden the knowledge base of the resulting model. This strategy, dubbed 'making the bad good,' intentionally degrades the initial accuracy in an effort to refine it, thus breaking conventional iterative improvements for innovative progression.
 image/png
 The DKE-RS method challenges the status quo by not solely relying on a linear enhancement trajectory, instead adopting a more holistic and diverse approach. We anticipate that this non-linear merger process will further diversify the model's knowledge base, thereby creating a more resilient and well-rounded language generation tool, capable of handling complex contexts with a broader understanding.
 Through thorough experimentation and analysis, we plan to assess the effectiveness and potential drawbacks of DKE-RS, comparing it to traditional merging techniques. The results from such evaluations will provide valuable insights into the efficacy of this divergent strategy in the landscape of natural language model development.
 We posit that the Divergent Knowledge Enhancement through Retrograde Merging Strategies approach contributes a significant and compelling step forward in the field, provoking thought-provoking discourse about the nature of accuracy refinement and model progression.
 # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
 Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_liminerity__mm4-3b)
 |             Metric              |Value|
 |---------------------------------|----:|
 |Avg.                             |53.22|
 |AI2 Reasoning Challenge (25-Shot)|44.80|
 |HellaSwag (10-Shot)              |70.41|
 |MMLU (5-Shot)                    |50.90|
 |TruthfulQA (0-shot)              |43.20|
 |Winogrande (5-shot)              |66.22|
 |GSM8k (5-shot)                   |43.82|
--- a/config.json
+++ b/config.json
@@ -0,0 +1,31 @@
 {
  "_name_or_path": "liminerity/mm1-slerp-3b",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 1,
  "dim_model_base": 256,
  "eos_token_id": 2,
  "hidden_act": "silu",
  "hidden_size": 2304,
  "initializer_range": 0.1,
  "intermediate_size": 5760,
  "max_position_embeddings": 2048,
  "model_type": "llama",
  "num_attention_heads": 36,
  "num_hidden_layers": 40,
  "num_key_value_heads": 36,
  "pretraining_tp": 1,
  "rms_norm_eps": 1e-05,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "scale_depth": 1.4,
  "scale_emb": 12,
  "tie_word_embeddings": false,
  "torch_dtype": "float16",
  "transformers_version": "4.39.0.dev0",
  "use_cache": false,
  "vocab_size": 122753
 }
--- a/generation_config.json
+++ b/generation_config.json
@@ -0,0 +1,8 @@
 {
  "_from_model_config": true,
  "bos_token_id": 1,
  "do_sample": true,
  "eos_token_id": 2,
  "transformers_version": "4.39.0.dev0",
  "use_cache": false
 }
--- a/pytorch_model-00001-of-00002.bin
+++ b/pytorch_model-00001-of-00002.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:db71717af27dd1e4197c549e628a1d1a40367f384a5ef8ebaba28e9a07ee652c
 size 4993313262
--- a/pytorch_model-00002-of-00002.bin
+++ b/pytorch_model-00002-of-00002.bin
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:0587d12d0053e0eac9de6a8ad440b62da526d3e07ed87aa58d8570603461635d
 size 1022223648
--- a/pytorch_model.bin.index.json
+++ b/pytorch_model.bin.index.json
@@ -0,0 +1,370 @@
 {
  "metadata": {
    "total_size": 6015407616
  },
  "weight_map": {
    "lm_head.weight": "pytorch_model-00002-of-00002.bin",
    "model.embed_tokens.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.12.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.13.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.13.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.13.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.13.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.14.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.14.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.14.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.14.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.15.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.15.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.15.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.15.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.15.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.16.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.16.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.16.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.16.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.16.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.16.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.16.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.16.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.16.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.17.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.17.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.17.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.17.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.17.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.17.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.17.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.17.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.17.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.18.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.18.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.18.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.18.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.18.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.18.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.18.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.18.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.18.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.19.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.19.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.19.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.19.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.19.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.19.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.19.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.19.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.19.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.20.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.20.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.20.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.20.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.20.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.20.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.20.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.20.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.20.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.21.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.21.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.21.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.21.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.21.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.21.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.21.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.22.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.22.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.22.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.22.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.22.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.22.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.23.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.23.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.23.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.23.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.23.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.23.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.23.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.23.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.23.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.24.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.24.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.24.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.24.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.24.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.24.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.24.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.24.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.24.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.25.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.25.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.25.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.25.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.25.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.25.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.25.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.25.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.25.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.26.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.26.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.26.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.26.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.26.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.26.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.26.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.26.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.26.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.27.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.27.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.27.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.27.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.27.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.27.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.27.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.27.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.27.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.28.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.28.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.28.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.28.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.28.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.28.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.28.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.28.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.28.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.29.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.29.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.29.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.29.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.29.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.29.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.29.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.29.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.29.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.30.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.30.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.30.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.30.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.30.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.30.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.30.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.30.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.30.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.31.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.31.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.31.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.31.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.31.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.31.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.31.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.31.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.31.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.32.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.32.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.32.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.32.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.32.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.32.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.32.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.32.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.32.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.33.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.33.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.33.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.33.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.33.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.33.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.33.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.33.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.33.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.34.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.34.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.34.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.34.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.34.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.34.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.34.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.34.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.34.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.35.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.35.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.35.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.35.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.35.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.35.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.35.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.35.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.35.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.36.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.36.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.36.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.36.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.36.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.36.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.36.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.36.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.36.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.37.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.37.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.37.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.37.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.37.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.37.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.37.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.37.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.37.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.38.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.38.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.38.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.38.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.38.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.38.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.38.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.38.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.38.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.39.input_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.39.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.39.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.39.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.39.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.39.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.39.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.39.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.39.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin",
    "model.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin",
    "model.norm.weight": "pytorch_model-00002-of-00002.bin"
  }
 }
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,30 @@
 {
  "bos_token": {
    "content": "<s>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "eos_token": {
    "content": "</s>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "pad_token": {
    "content": "</s>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "unk_token": {
    "content": "<unk>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  }
 }
--- a/tokenizer.model
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:c9aafcd7da1f5611dab6be545db74d5552a2ccc9c2a12c72ea7be63aac4a25d7
 size 1994871
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,51 @@
 {
  "add_bos_token": true,
  "add_eos_token": false,
  "add_prefix_space": true,
  "added_tokens_decoder": {
    "0": {
      "content": "<unk>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "1": {
      "content": "<s>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "2": {
      "content": "</s>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    }
  },
  "bos_token": "<s>",
  "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{'<用户>' + message['content'].strip() + '<AI>'}}{% else %}{{message['content'].strip()}}{% endif %}{% endfor %}",
  "clean_up_tokenization_spaces": false,
  "eos_token": "</s>",
  "legacy": true,
  "max_length": 512,
  "model_max_length": 1000000000000000019884624838656,
  "pad_to_multiple_of": null,
  "pad_token": "</s>",
  "pad_token_type_id": 0,
  "padding_side": "left",
  "sp_model_kwargs": {},
  "spaces_between_special_tokens": false,
  "stride": 0,
  "tokenizer_class": "LlamaTokenizer",
  "truncation_side": "right",
  "truncation_strategy": "longest_first",
  "unk_token": "<unk>",
  "use_default_system_prompt": false,
  "use_fast": true
 }