初始化项目,由ModelHub XC社区提供模型

Model: AIDC-AI/Marco-LLM-ES
Source: Original Platform
This commit is contained in:
ModelHub XC
2026-06-06 15:42:13 +08:00
commit abd2e5215f
14 changed files with 455037 additions and 0 deletions

39
.gitattributes vendored Normal file
View File

@@ -0,0 +1,39 @@
*.7z filter=lfs diff=lfs merge=lfs -text
*.arrow filter=lfs diff=lfs merge=lfs -text
*.bin filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.ckpt filter=lfs diff=lfs merge=lfs -text
*.ftz filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.h5 filter=lfs diff=lfs merge=lfs -text
*.joblib filter=lfs diff=lfs merge=lfs -text
*.lfs.* filter=lfs diff=lfs merge=lfs -text
*.mlmodel filter=lfs diff=lfs merge=lfs -text
*.model filter=lfs diff=lfs merge=lfs -text
*.msgpack filter=lfs diff=lfs merge=lfs -text
*.npy filter=lfs diff=lfs merge=lfs -text
*.npz filter=lfs diff=lfs merge=lfs -text
*.onnx filter=lfs diff=lfs merge=lfs -text
*.ot filter=lfs diff=lfs merge=lfs -text
*.parquet filter=lfs diff=lfs merge=lfs -text
*.pb filter=lfs diff=lfs merge=lfs -text
*.pickle filter=lfs diff=lfs merge=lfs -text
*.pkl filter=lfs diff=lfs merge=lfs -text
*.pt filter=lfs diff=lfs merge=lfs -text
*.pth filter=lfs diff=lfs merge=lfs -text
*.rar filter=lfs diff=lfs merge=lfs -text
*.safetensors filter=lfs diff=lfs merge=lfs -text
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
*.tar.* filter=lfs diff=lfs merge=lfs -text
*.tar filter=lfs diff=lfs merge=lfs -text
*.tflite filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.wasm filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text
*tfevents* filter=lfs diff=lfs merge=lfs -text
model-00000-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text
model-00001-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text
model-00002-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text
model-00003-of-00004.safetensors filter=lfs diff=lfs merge=lfs -text

66
README.md Normal file
View File

@@ -0,0 +1,66 @@
---
language:
- es
pipeline_tag: text-generation
tags:
- pretrained
license: apache-2.0
---
# Marco-LLM-ES-7B
## Introduction
Marco-LLM-ES is a series of enhanced language models specifically fine-tuned for common languages used in Spain, including Catalan, Basque, Galician, and Spanish. This repository contains the 7B Marco-LLM-ES base language model.
Compared with the state-of-the-art open-source language models, Marco-LLM-ES has undergone extensive continued pretraining on a dataset containing approximately 50 billion tokens, enhancing its capabilities in the targeted languages while maintaining competitiveness in general benchmarks.
For more details, please refer to our [Hugging Face page](https://huggingface.co/AIDC-AI/Marco-LLM-ES).
## Model Details
Marco-LLM-ES series includes models of varying sizes, from 7B to 72B parameters, including both base and instruction-tuned (Instruct) models. The models are based on the Transformer architecture with SwiGLU activation, attention QKV bias, and group query attention. Additionally, the models employ an improved tokenizer adaptive to multiple languages.
## Usage
It is not advised to use the base language models for direct text generation tasks. Instead, it is recommended to apply post-training methods such as Supervised Fine-tuning (SFT), Reinforcement Learning with Human Feedback (RLHF), or continued pretraining to adapt the models for specific use cases.
### Performance
The evaluation of Marco-LLM-ES models focuses on performance in natural language understanding, general question answering, coding, mathematics, scientific knowledge, reasoning, multilingual capability, and their enhanced performance in the targeted languages: Catalan, Basque, Galician, and Spanish.
The datasets used for evaluation include:
**Spanish-specific Tasks**: Evaluations in Catalan, Basque, Galician, and Spanish at LaLeaderboard (5-shot)
#### Marco-LLM-ES-7B performance
| Datasets | Marco-LLM-ES-7B |
| :---------------- | :-----------------: |
| Spanish | **44.49** |
| Catalan | **39.45** |
| Basque | **28.66** |
| Galician | **24.04** |
| Average | **34.16** |
## Citation
If you find our work helpful, please give us a citation.
```
@article{unique_identifier,
title={Marco-LLM: Bridging Languages via Massive Multilingual Training for Cross-Lingual Enhancement},
journal={arXiv},
volume={},
number={2412.04003},
year={2024},
url={https://arxiv.org/abs/2412.04003}
}
```

27
config.json Normal file
View File

@@ -0,0 +1,27 @@
{
"architectures": [
"Qwen2ForCausalLM"
],
"attention_dropout": 0.0,
"bos_token_id": 151643,
"eos_token_id": 151643,
"hidden_act": "silu",
"hidden_size": 3584,
"initializer_range": 0.02,
"intermediate_size": 18944,
"max_position_embeddings": 131072,
"max_window_layers": 28,
"model_type": "qwen2",
"num_attention_heads": 28,
"num_hidden_layers": 28,
"num_key_value_heads": 4,
"rms_norm_eps": 1e-06,
"rope_theta": 1000000.0,
"sliding_window": 131072,
"tie_word_embeddings": false,
"torch_dtype": "bfloat16",
"transformers_version": "4.37.2",
"use_cache": true,
"use_sliding_window": false,
"vocab_size": 152064
}

1
configuration.json Normal file
View File

@@ -0,0 +1 @@
{"framework": "pytorch", "task": "others", "allow_remote": true}

7
generation_config.json Normal file
View File

@@ -0,0 +1,7 @@
{
"bos_token_id": 151643,
"do_sample": false,
"eos_token_id": 151643,
"max_new_tokens": 2048,
"transformers_version": "4.37.0"
}

151387
merges.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:e77bee75a8c93efaa5a0a202997573412f61f2ef6d8139d2faf52ab7df2d46ef
size 3875705088

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:7ebe54df7334c1ecc46757e52374c2168f6e0294d16e17e5da51249f1758ac70
size 4444566848

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:9a919d27f9962974172847ab360be9f9ae3d5d94f4b5f2e80b627d8632a32cc2
size 4224367056

View File

@@ -0,0 +1,3 @@
version https://git-lfs.github.com/spec/v1
oid sha256:344f94a8fbc5d32a71d5ac94a2021db506d2dc8c3a503cd5cea74291dfec9c8f
size 2686632840

View File

@@ -0,0 +1,346 @@
{
"metadata": {
"total_size": 15231233024
},
"weight_map": {
"model.layers.19.self_attn.o_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.19.post_attention_layernorm.weight": "model-00000-of-00004.safetensors",
"model.layers.20.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.20.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.20.self_attn.v_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.20.mlp.gate_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.20.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.20.mlp.down_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.21.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.21.self_attn.k_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.21.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.21.self_attn.o_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.21.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.21.input_layernorm.weight": "model-00000-of-00004.safetensors",
"model.layers.21.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.22.self_attn.k_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.22.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.22.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.22.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.22.mlp.up_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.22.mlp.down_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.23.self_attn.v_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.23.self_attn.v_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.23.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.23.mlp.up_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.23.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.23.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.23.post_attention_layernorm.weight": "model-00000-of-00004.safetensors",
"model.layers.24.self_attn.q_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.24.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.24.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.24.self_attn.k_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.24.self_attn.v_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.24.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.24.self_attn.o_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.24.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.24.mlp.down_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.24.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.24.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.q_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.25.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.25.self_attn.k_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.25.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.25.mlp.up_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.25.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.26.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.26.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.26.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.26.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.26.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.26.mlp.down_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.26.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.27.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.27.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.27.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.27.self_attn.v_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.27.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.27.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.27.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.norm.weight": "model-00001-of-00004.safetensors",
"lm_head.weight": "model-00002-of-00004.safetensors",
"model.embed_tokens.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.0.self_attn.q_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.0.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.0.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.0.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.0.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.0.mlp.down_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.0.post_attention_layernorm.weight": "model-00000-of-00004.safetensors",
"model.layers.1.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.1.self_attn.k_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.1.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.1.mlp.gate_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.1.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.1.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.1.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.q_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.2.self_attn.k_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.2.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.2.self_attn.v_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.2.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.2.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.2.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.2.post_attention_layernorm.weight": "model-00000-of-00004.safetensors",
"model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.3.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.3.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.3.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.3.self_attn.v_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.3.self_attn.o_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.3.mlp.gate_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.3.mlp.up_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.3.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.3.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.4.self_attn.q_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.4.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.4.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.4.mlp.down_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.4.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.5.self_attn.q_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.5.self_attn.v_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.5.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.5.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.5.mlp.up_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.6.self_attn.q_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.6.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.6.mlp.up_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.6.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.6.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.7.self_attn.q_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.7.self_attn.v_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.7.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.7.mlp.up_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.7.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.7.post_attention_layernorm.weight": "model-00000-of-00004.safetensors",
"model.layers.8.self_attn.q_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.8.self_attn.q_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.8.self_attn.k_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.8.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.8.self_attn.o_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.8.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.8.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.9.self_attn.q_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.9.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.9.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.9.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.9.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.9.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.9.mlp.up_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.9.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.9.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.q_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.10.self_attn.k_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.10.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.10.mlp.gate_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.10.mlp.up_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.10.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.q_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.11.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.self_attn.k_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.11.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.11.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.11.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.11.mlp.down_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.11.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.12.self_attn.q_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.12.self_attn.q_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.12.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.12.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.12.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.12.self_attn.v_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.12.self_attn.o_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.12.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.12.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.12.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.12.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.13.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.13.mlp.gate_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.13.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.13.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.13.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.14.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.14.self_attn.q_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.14.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.14.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.14.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.14.mlp.up_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.14.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.15.self_attn.q_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.15.self_attn.k_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.15.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.15.self_attn.v_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.15.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.15.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.15.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.16.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.16.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
"model.layers.16.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.16.self_attn.k_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.16.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.16.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.mlp.up_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.16.input_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.16.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.17.self_attn.q_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.17.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
"model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.17.mlp.down_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors",
"model.layers.17.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.18.self_attn.q_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
"model.layers.18.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
"model.layers.18.self_attn.v_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.mlp.gate_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.18.mlp.up_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors",
"model.layers.18.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
"model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
"model.layers.19.self_attn.q_proj.bias": "model-00000-of-00004.safetensors",
"model.layers.19.self_attn.k_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
"model.layers.19.self_attn.v_proj.weight": "model-00000-of-00004.safetensors",
"model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors"
}
}

303111
tokenizer.json Normal file

File diff suppressed because it is too large Load Diff

40
tokenizer_config.json Normal file
View File

@@ -0,0 +1,40 @@
{
"add_prefix_space": false,
"added_tokens_decoder": {
"151643": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151644": {
"content": "<|im_start|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
},
"151645": {
"content": "<|im_end|>",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
}
},
"additional_special_tokens": ["<|im_start|>", "<|im_end|>"],
"bos_token": null,
"chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
"clean_up_tokenization_spaces": false,
"eos_token": "<|endoftext|>",
"errors": "replace",
"model_max_length": 32768,
"pad_token": "<|endoftext|>",
"split_special_tokens": false,
"tokenizer_class": "Qwen2Tokenizer",
"unk_token": null
}

1
vocab.json Normal file

File diff suppressed because one or more lines are too long