commit 319e9d6685195dfed937becc3441b7bfa1c6f58a Author: ModelHub XC Date: Thu May 14 08:02:48 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: elyza/ELYZA-Shortcut-1.0-Qwen-7B Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..d2f31f8 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,37 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +key_visual.png filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..84358ff --- /dev/null +++ b/README.md @@ -0,0 +1,96 @@ +--- +base_model: +- Qwen/Qwen2.5-7B-Instruct +library_name: transformers +license: apache-2.0 +language: +- ja +- en +--- +# ELYZA-Shortcut-1.0-Qwen-7B + +![ELYZA-Shortcut-1.0-Qwen-7B-image](./key_visual.png) + +## Model Description + +**ELYZA-Shortcut-1.0-Qwen-7B** is a non-reasoning model derived during the development of the reasoning model [ELYZA-Thinking-1.0-Qwen-32B](https://huggingface.co/elyza/ELYZA-Thinking-1.0-Qwen-32B). Based on [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct), this model has been post-trained to bypass the step-by-step reasoning process and directly generate the final answer (**Built with Qwen**). + +During the post-training phase, the model was trained via supervised fine-tuning (SFT) using problem-solution pairs. These pairs were obtained by removing reasoning steps from optimal reasoning paths explored through an Monte Carlo Tree Search (MCTS) based algorithm. For more details, please refer to [our blog post](https://zenn.dev/elyza/articles/bc68f53fc0a83b). + +## Usage + +You can use the model with the [Hugging Face Transformers](https://huggingface.co/docs/transformers/index) library. The following code is an example of how to use the model for inference. + +```python +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer + +model_name = "elyza/ELYZA-Shortcut-1.0-Qwen-7B" +prompt = "仕事の熱意を取り戻すためのアイデアを5つ挙げてください。" + +tokenizer = AutoTokenizer.from_pretrained(model_name) +model = AutoModelForCausalLM.from_pretrained( + model_name, + torch_dtype="auto", + device_map="auto", +) +model.eval() +messages = [{"role": "user", "content": prompt}] +input_text = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True +) +token_ids = tokenizer.encode( + input_text, add_special_tokens=False, return_tensors="pt" +) +with torch.no_grad(): + output_ids = model.generate( + token_ids.to(model.device), + max_new_tokens=8192, + do_sample=True, + temperature=0.6, + top_p=0.95, + ) +output = tokenizer.decode( + output_ids.tolist()[0][token_ids.size(1):], skip_special_tokens=True +) +print(output) +``` + +For deployment, [vLLM](https://docs.vllm.ai/en/latest/) is recommended to create an OpenAI-Compatible Server. + +```bash +vllm serve elyza/ELYZA-Shortcut-1.0-Qwen-7B \ + --max-model-len 32768 +``` + +## How to Cite + +```tex +@misc{elyza2025thinking, + title={elyza/ELYZA-Thinking-1.0-Qwen-32B}, + url={https://huggingface.co/elyza/ELYZA-Thinking-1.0-Qwen-32B}, + author={Masato Hirakawa and Tomoaki Nakamura and Akira Sasaki and Daisuke Oba and Shoetsu Sato}, + year={2025}, +} +``` + +## Citations + +```tex +@misc{qwen2.5, + title = {Qwen2.5: A Party of Foundation Models}, + url = {https://qwenlm.github.io/blog/qwen2.5/}, + author = {Qwen Team}, + month = {September}, + year = {2024} +} + +@article{qwen2, + title={Qwen2 Technical Report}, + author={An Yang and Baosong Yang and Binyuan Hui and Bo Zheng and Bowen Yu and Chang Zhou and Chengpeng Li and Chengyuan Li and Dayiheng Liu and Fei Huang and Guanting Dong and Haoran Wei and Huan Lin and Jialong Tang and Jialin Wang and Jian Yang and Jianhong Tu and Jianwei Zhang and Jianxin Ma and Jin Xu and Jingren Zhou and Jinze Bai and Jinzheng He and Junyang Lin and Kai Dang and Keming Lu and Keqin Chen and Kexin Yang and Mei Li and Mingfeng Xue and Na Ni and Pei Zhang and Peng Wang and Ru Peng and Rui Men and Ruize Gao and Runji Lin and Shijie Wang and Shuai Bai and Sinan Tan and Tianhang Zhu and Tianhao Li and Tianyu Liu and Wenbin Ge and Xiaodong Deng and Xiaohuan Zhou and Xingzhang Ren and Xinyu Zhang and Xipin Wei and Xuancheng Ren and Yang Fan and Yang Yao and Yichang Zhang and Yu Wan and Yunfei Chu and Yuqiong Liu and Zeyu Cui and Zhenru Zhang and Zhihao Fan}, + journal={arXiv preprint arXiv:2407.10671}, + year={2024} +} +``` diff --git a/config.json b/config.json new file mode 100644 index 0000000..98c9ef6 --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "_name_or_path": "/home/shared/models/hf/elyza/Qwen2.5-7B-Instruct-ja60B-shortcut_checkpoint-4000", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 3584, + "initializer_range": 0.02, + "intermediate_size": 18944, + "max_position_embeddings": 32768, + "max_window_layers": 28, + "model_type": "qwen2", + "num_attention_heads": 28, + "num_hidden_layers": 28, + "num_key_value_heads": 4, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.43.3", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..42b0c0e --- /dev/null +++ b/generation_config.json @@ -0,0 +1,14 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "4.46.2" +} diff --git a/key_visual.png b/key_visual.png new file mode 100644 index 0000000..23152c0 --- /dev/null +++ b/key_visual.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:028cc064d9f27cd1e53921b0f4ae59d3f24f743f4c798162ac6acc39094b7674 +size 273777 diff --git a/mergekit_config.yml b/mergekit_config.yml new file mode 100644 index 0000000..5917f8e --- /dev/null +++ b/mergekit_config.yml @@ -0,0 +1,13 @@ +base_model: /home/shared/models/hf/elyza/Qwen2.5-7B-Instruct-ja60B-shortcut_checkpoint-4000 +dtype: bfloat16 +merge_method: linear +slices: +- sources: + - layer_range: [0, 28] + model: /home/shared/models/hf/elyza/Qwen2.5-7B-Instruct-ja60B-shortcut_checkpoint-4000 + parameters: + weight: 0.5 + - layer_range: [0, 28] + model: Qwen/Qwen2.5-7B-Instruct + parameters: + weight: 0.5 diff --git a/model-00001-of-00004.safetensors b/model-00001-of-00004.safetensors new file mode 100644 index 0000000..e7782a8 --- /dev/null +++ b/model-00001-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aaf6263eb93d6215c37218301130c70e584a94223bbe44f59b01e3ed8fe947a0 +size 4976698776 diff --git a/model-00002-of-00004.safetensors b/model-00002-of-00004.safetensors new file mode 100644 index 0000000..d538737 --- /dev/null +++ b/model-00002-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42779e640151ab9e49bbc81ea64dbc0ec99b9580b9f14b8d0c9a27b5231b2b31 +size 4932751032 diff --git a/model-00003-of-00004.safetensors b/model-00003-of-00004.safetensors new file mode 100644 index 0000000..1ebd029 --- /dev/null +++ b/model-00003-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:137a6398a85ebe7ce02d3ed68fb75c58b61c7c543ce51490af201056f0fbb333 +size 4991495808 diff --git a/model-00004-of-00004.safetensors b/model-00004-of-00004.safetensors new file mode 100644 index 0000000..428d431 --- /dev/null +++ b/model-00004-of-00004.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d149ff3cc0dd567318e30d188eff183db0db15fd3b2cb3df66ecd22b66880d3 +size 330326240 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..028aee5 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1 @@ +{"metadata": {"mergekit_version": "0.0.4.4", "total_size": 15231233024}, "weight_map": {"lm_head.weight": "model-00001-of-00004.safetensors", "model.embed_tokens.weight": "model-00001-of-00004.safetensors", "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors", "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors", "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors", "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors", "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", "model.layers.10.input_layernorm.weight": "model-00001-of-00004.safetensors", "model.layers.10.mlp.down_proj.weight": "model-00001-of-00004.safetensors", "model.layers.10.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", "model.layers.10.mlp.up_proj.weight": "model-00001-of-00004.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", "model.layers.11.input_layernorm.weight": "model-00001-of-00004.safetensors", "model.layers.11.mlp.down_proj.weight": "model-00001-of-00004.safetensors", "model.layers.11.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", "model.layers.11.mlp.up_proj.weight": "model-00001-of-00004.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", "model.layers.12.input_layernorm.weight": "model-00001-of-00004.safetensors", "model.layers.12.mlp.down_proj.weight": "model-00001-of-00004.safetensors", "model.layers.12.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", "model.layers.12.mlp.up_proj.weight": "model-00001-of-00004.safetensors", "model.layers.12.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", "model.layers.12.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", "model.layers.13.input_layernorm.weight": "model-00001-of-00004.safetensors", "model.layers.13.mlp.down_proj.weight": "model-00001-of-00004.safetensors", "model.layers.13.mlp.gate_proj.weight": "model-00001-of-00004.safetensors", "model.layers.13.mlp.up_proj.weight": "model-00001-of-00004.safetensors", "model.layers.13.post_attention_layernorm.weight": "model-00001-of-00004.safetensors", "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00004.safetensors", "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00004.safetensors", "model.layers.13.self_attn.o_proj.weight": "model-00001-of-00004.safetensors", "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00004.safetensors", "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00004.safetensors", "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00004.safetensors", "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00004.safetensors", "model.layers.14.input_layernorm.weight": "model-00001-of-00004.safetensors", "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors", "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors", "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", "model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", "model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors", "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors", "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", "model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", "model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors", "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors", "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", "model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors", "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors", "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", "model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", "model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", "model.layers.18.input_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.18.mlp.down_proj.weight": "model-00002-of-00004.safetensors", "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors", "model.layers.18.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.18.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", "model.layers.18.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", "model.layers.18.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", "model.layers.19.input_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.19.mlp.down_proj.weight": "model-00002-of-00004.safetensors", "model.layers.19.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", "model.layers.19.mlp.up_proj.weight": "model-00002-of-00004.safetensors", "model.layers.19.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.19.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", "model.layers.19.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", "model.layers.19.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", "model.layers.19.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", "model.layers.19.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", "model.layers.19.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", "model.layers.19.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", "model.layers.2.input_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.2.mlp.down_proj.weight": "model-00002-of-00004.safetensors", "model.layers.2.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", "model.layers.2.mlp.up_proj.weight": "model-00002-of-00004.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.2.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", "model.layers.2.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", "model.layers.2.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", "model.layers.20.input_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.20.mlp.down_proj.weight": "model-00002-of-00004.safetensors", "model.layers.20.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", "model.layers.20.mlp.up_proj.weight": "model-00002-of-00004.safetensors", "model.layers.20.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.20.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", "model.layers.20.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", "model.layers.20.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", "model.layers.20.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", "model.layers.20.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", "model.layers.20.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", "model.layers.20.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", "model.layers.21.input_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.21.mlp.down_proj.weight": "model-00002-of-00004.safetensors", "model.layers.21.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", "model.layers.21.mlp.up_proj.weight": "model-00002-of-00004.safetensors", "model.layers.21.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.21.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", "model.layers.21.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", "model.layers.21.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", "model.layers.21.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", "model.layers.21.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", "model.layers.21.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", "model.layers.21.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", "model.layers.22.input_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.22.mlp.down_proj.weight": "model-00002-of-00004.safetensors", "model.layers.22.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", "model.layers.22.mlp.up_proj.weight": "model-00002-of-00004.safetensors", "model.layers.22.post_attention_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.22.self_attn.k_proj.bias": "model-00002-of-00004.safetensors", "model.layers.22.self_attn.k_proj.weight": "model-00002-of-00004.safetensors", "model.layers.22.self_attn.o_proj.weight": "model-00002-of-00004.safetensors", "model.layers.22.self_attn.q_proj.bias": "model-00002-of-00004.safetensors", "model.layers.22.self_attn.q_proj.weight": "model-00002-of-00004.safetensors", "model.layers.22.self_attn.v_proj.bias": "model-00002-of-00004.safetensors", "model.layers.22.self_attn.v_proj.weight": "model-00002-of-00004.safetensors", "model.layers.23.input_layernorm.weight": "model-00002-of-00004.safetensors", "model.layers.23.mlp.down_proj.weight": "model-00002-of-00004.safetensors", "model.layers.23.mlp.gate_proj.weight": "model-00002-of-00004.safetensors", "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors", "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", "model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", "model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors", "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors", "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", "model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", "model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors", "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors", "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", "model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", "model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors", "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors", "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", "model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", "model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors", "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors", "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", "model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", "model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", "model.layers.3.input_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.3.mlp.down_proj.weight": "model-00003-of-00004.safetensors", "model.layers.3.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", "model.layers.3.mlp.up_proj.weight": "model-00003-of-00004.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.3.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", "model.layers.3.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", "model.layers.3.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", "model.layers.4.input_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.4.mlp.down_proj.weight": "model-00003-of-00004.safetensors", "model.layers.4.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", "model.layers.4.mlp.up_proj.weight": "model-00003-of-00004.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.4.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", "model.layers.4.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", "model.layers.4.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", "model.layers.5.input_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.5.mlp.down_proj.weight": "model-00003-of-00004.safetensors", "model.layers.5.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", "model.layers.5.mlp.up_proj.weight": "model-00003-of-00004.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.5.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", "model.layers.5.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", "model.layers.5.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", "model.layers.6.input_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.6.mlp.down_proj.weight": "model-00003-of-00004.safetensors", "model.layers.6.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", "model.layers.6.mlp.up_proj.weight": "model-00003-of-00004.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.6.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", "model.layers.6.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", "model.layers.6.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", "model.layers.7.input_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.7.mlp.down_proj.weight": "model-00003-of-00004.safetensors", "model.layers.7.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", "model.layers.7.mlp.up_proj.weight": "model-00003-of-00004.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.7.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", "model.layers.7.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", "model.layers.7.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", "model.layers.8.input_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.8.mlp.down_proj.weight": "model-00003-of-00004.safetensors", "model.layers.8.mlp.gate_proj.weight": "model-00003-of-00004.safetensors", "model.layers.8.mlp.up_proj.weight": "model-00003-of-00004.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.8.self_attn.k_proj.bias": "model-00003-of-00004.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00003-of-00004.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00003-of-00004.safetensors", "model.layers.8.self_attn.q_proj.bias": "model-00003-of-00004.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00003-of-00004.safetensors", "model.layers.8.self_attn.v_proj.bias": "model-00003-of-00004.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00003-of-00004.safetensors", "model.layers.9.input_layernorm.weight": "model-00003-of-00004.safetensors", "model.layers.9.mlp.down_proj.weight": "model-00003-of-00004.safetensors", "model.layers.9.mlp.gate_proj.weight": "model-00004-of-00004.safetensors", "model.layers.9.mlp.up_proj.weight": "model-00004-of-00004.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00004-of-00004.safetensors", "model.layers.9.self_attn.k_proj.bias": "model-00004-of-00004.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00004-of-00004.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00004-of-00004.safetensors", "model.layers.9.self_attn.q_proj.bias": "model-00004-of-00004.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00004-of-00004.safetensors", "model.layers.9.self_attn.v_proj.bias": "model-00004-of-00004.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00004-of-00004.safetensors", "model.norm.weight": "model-00004-of-00004.safetensors"}} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..ac23c0a --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,31 @@ +{ + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..51ebb3b --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa +size 11421896 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..895a05f --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,208 @@ +{ + "add_bos_token": false, + "add_prefix_space": false, + "added_tokens_decoder": { + "151643": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151645": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151646": { + "content": "<|object_ref_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|object_ref_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151648": { + "content": "<|box_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151649": { + "content": "<|box_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [ + "<|im_start|>", + "<|im_end|>", + "<|object_ref_start|>", + "<|object_ref_end|>", + "<|box_start|>", + "<|box_end|>", + "<|quad_start|>", + "<|quad_end|>", + "<|vision_start|>", + "<|vision_end|>", + "<|vision_pad|>", + "<|image_pad|>", + "<|video_pad|>" + ], + "bos_token": null, + "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within XML tags:\\n\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n\\n\\nFor each function call, return a json object with function name and arguments within XML tags:\\n\\n{\\\"name\\\": , \\\"arguments\\\": }\\n<|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n\\n' }}\n {{- message.content }}\n {{- '\\n' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "extra_special_tokens": {}, + "model_max_length": 131072, + "pad_token": "<|endoftext|>", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +}