初始化项目，由ModelHub XC社区提供模型

Model: BAAI/bge-code-v1 Source: Original Platform
2026-05-14 14:33:23 +08:00
commit b631246acd
17 changed files with 152581 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,36 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
 *.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text
--- a/1_Pooling/config.json
+++ b/1_Pooling/config.json
@@ -0,0 +1,10 @@
 {
  "word_embedding_dimension": 1536,
  "pooling_mode_cls_token": false,
  "pooling_mode_mean_tokens": false,
  "pooling_mode_max_tokens": false,
  "pooling_mode_mean_sqrt_len_tokens": false,
  "pooling_mode_weightedmean_tokens": false,
  "pooling_mode_lasttoken": true,
  "include_prompt": true
 }
--- a/README.md
+++ b/README.md
@@ -0,0 +1,213 @@
 ---
 language:
 - zh
 - en
 tags:
 - sentence-transformers
 - sentence-similarity
 - feature-extraction
 - transformers
 pipeline_tag: sentence-similarity
 library_name: sentence-transformers
 license: apache-2.0
 ---
 <h1 align="center">FlagEmbedding</h1>
 For more details please refer to our Github: [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding).
 **BGE-Code-v1** is an LLM-based code embedding model that supports code retrieval, text retrieval, and multilingual retrieval. It primarily demonstrates the following capabilities:
 - Superior Code Retrieval Performance: The model demonstrates exceptional code retrieval capabilities, supporting natural language queries in both English and Chinese, as well as 20 programming languages.
 - Robust Text Retrieval Capabilities: The model maintains strong text retrieval capabilities comparable to text embedding models of similar scale.
 - Extensive Multilingual Support: BGE-Code-v1 offers comprehensive multilingual retrieval capabilities, excelling in languages such as English, Chinese, Japanese, French, and more.
 ## Usage
 ### Using FlagEmbedding
 ```
 git clone https://github.com/FlagOpen/FlagEmbedding.git
 cd FlagEmbedding
 pip install -e .
 ```
 ```python
 from FlagEmbedding import FlagLLMModel
 queries = [
    "Delete the record with ID 4 from the 'Staff' table.", 
    'Delete all records in the "Livestock" table where age is greater than 5'
 ]
 documents = [
    "DELETE FROM Staff WHERE StaffID = 4;",
    "DELETE FROM Livestock WHERE age > 5;"
 ]
 model = FlagLLMModel('BAAI/bge-code-v1', 
                     query_instruction_format="<instruct>{}\n<query>{}",
                     query_instruction_for_retrieval="Given a question in text, retrieve SQL queries that are appropriate responses to the question.",
                     trust_remote_code=True,
                     use_fp16=True) # Setting use_fp16 to True speeds up computation with a slight performance degradation
 embeddings_1 = model.encode_queries(queries)
 embeddings_2 = model.encode_corpus(documents)
 similarity = embeddings_1 @ embeddings_2.T
 print(similarity)
 ```
 By default, FlagLLMModel will use all available GPUs when encoding. Please set `os.environ["CUDA_VISIBLE_DEVICES"]` to select specific GPUs. You also can set `os.environ["CUDA_VISIBLE_DEVICES"]=""` to make all GPUs unavailable.
 ### Using Sentence Transformers
 ```python
 from sentence_transformers import SentenceTransformer
 import torch
 # Load the model, optionally in float16 precision for faster inference
 model = SentenceTransformer(
    "BAAI/bge-code-v1",
    trust_remote_code=True,
    model_kwargs={"torch_dtype": torch.float16},
 )
 # Prepare a prompt given an instruction
 instruction = 'Given a question in text, retrieve SQL queries that are appropriate responses to the question.'
 prompt = f'<instruct>{instruction}\n<query>'
 # Prepare queries and documents
 queries = [
    "Delete the record with ID 4 from the 'Staff' table.", 
    'Delete all records in the "Livestock" table where age is greater than 5'
 ]
 documents = [
    "DELETE FROM Staff WHERE StaffID = 4;",
    "DELETE FROM Livestock WHERE age > 5;"
 ]
 # Compute the query and document embeddings
 query_embeddings = model.encode(queries, prompt=prompt)
 document_embeddings = model.encode(documents)
 # Compute the cosine similarity between the query and document embeddings
 similarities = model.similarity(query_embeddings, document_embeddings)
 print(similarities)
 ```
 ### Using HuggingFace Transformers
 ```python
 import torch
 import torch.nn.functional as F
 from torch import Tensor
 from transformers import AutoTokenizer, AutoModel
 def last_token_pool(last_hidden_states: Tensor,
                 attention_mask: Tensor) -> Tensor:
    left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
    if left_padding:
        return last_hidden_states[:, -1]
    else:
        sequence_lengths = attention_mask.sum(dim=1) - 1
        batch_size = last_hidden_states.shape[0]
        return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
 def get_detailed_instruct(task_description: str, query: str) -> str:
    return f'<instruct>{task_description}\n<query>{query}'
 instruction = 'Given a question in text, retrieve SQL queries that are appropriate responses to the question.'
 queries = [
    "Delete the record with ID 4 from the 'Staff' table.", 
    'Delete all records in the "Livestock" table where age is greater than 5'
 ]
 documents = [
    "DELETE FROM Staff WHERE StaffID = 4;",
    "DELETE FROM Livestock WHERE age > 5;"
 ]
 input_texts = queries + documents
 tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-code-v1', trust_remote_code=True)
 model = AutoModel.from_pretrained('BAAI/bge-code-v1', trust_remote_code=True)
 model.eval()
 max_length = 4096
 # Tokenize the input texts
 batch_dict = tokenizer(input_texts, max_length=max_length, padding=True, truncation=True, return_tensors='pt', pad_to_multiple_of=8)
 with torch.no_grad():
    outputs = model(**batch_dict)
    embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
 # normalize embeddings
 embeddings = F.normalize(embeddings, p=2, dim=1)
 scores = (embeddings[:2] @ embeddings[2:].T) * 100
 print(scores.tolist())
 ```
 ## Evaluation
 **BGE-Code-v1** achieves state-of-the-art performance on both the CoIR and CodeRAG benchmarks.
 - CoIR
 |                                 | CodeXEmbed-2B | CodeXEmbed-7B | Voyage-Code-002 | Voyage-Code-003 | BGE-Code-v1 |
 |---------------------------------------|---------------|---------------|-----------------|-----------------|-----------|
 | **Apps**                                  | 76.86         | 85.38         | 26.52           | 93.62           | 98.08     |
 | **CosQA**                                 | 40.47         | 42.47         | 29.79           | 34.45           | 46.72     |
 | **Text2SQL**                              | 78.42         | 78.94         | 69.26           | 62.87           | 64.35     |
 | **CSN**                                   | 87.87         | 89.67         | 81.79           | 89.35           | 89.53     |
 | **CSN-CCR**                               | 97.66         | 97.95         | 73.45           | 90.05           | 98.30     |
 | **CodeTrans-Contest**                     | 90.30         | 94.45         | 72.77           | 94.96           | 94.38     |
 | **CodeTrans-DL**                          | 38.57         | 40.46         | 27.48           | 38.57           | 46.13     |
 | **StackOverFlow-QA**                      | 94.47         | 96.33         | 67.68           | 97.17           | 95.35     |
 | **CodeFeedBack-ST**                       | 86.36         | 87.53         | 65.35           | 90.67           | 90.56     |
 | **CodeFeedBack-MT**                       | 65.51         | 68.83         | 28.74           | 93.58           | 94.38     |
 | **AVG**                                   | **75.65**         | **78.20**         | **56.26**           | **78.53**    | **81.77**     |
 - CodedRAG
 |                 | HummanEval | MBPP | DS-1000 | ODEX | RepoEval | SWE-bench-Lite | AVG  |
 | --------------- | ---------- | ---- | ------- | ---- | -------- | -------------- | ---- |
 | SFR             | 100.0      | 99.0 | 19.3    | 37.1 | 83.8     | 62.7           | **67.0** |
 | Jina-v2-code    | 100.0      | 97.7 | 26.2    | 19.9 | 90.5     | 58.3           | **65.4** |
 | CodeXEmbed-2B   | 100.0      | 97.4 | 25.4    | 23.9 | 88.7     | 52.4           | **64.6** |
 | Voyage-Code-002 | 100.0      | 99.0 | 33.1    | 26.6 | 94.3     | 29.1           | **63.7** |
 | BGE-Code-v1       | 100.0      | 99.2 | 40.9    | 36.1 | 93.1     | 67.4           | **72.8** |
 ### Instructions for Evaluation
 ```python
 {
    "Apps": "Given a code contest problem description, retrieve relevant code that can help solve the problem.",
    "CosQA": "Given a web search query, retrieve relevant code that can help answer the query.",
    "Text2SQL": "Given a question in text, retrieve SQL queries that are appropriate responses to the question.",
    "CSN": "Given a piece of code, retrieve the document string that summarizes the code.",
    "CSN-CCR": "Given a piece of code segment, retrieve the code segment that is the latter part of the code.",
    "CodeTrans-DL": "Given a piece of code, retrieve code that is semantically equivalent to the input code.",
    "CodeTrans-Contest": "Given a piece of Python code, retrieve C++ code that is semantically equivalent to the input code.",
    "StackOverFlow-QA": "Given a question that consists of a mix of text and code snippets, retrieve relevant answers that also consist of a mix of text and code snippets, and can help answer the question.",
    "CodeFeedBack-ST": "Given a question that consists of a mix of text and code snippets, retrieve relevant answers that also consist of a mix of text and code snippets, and can help answer the question.",
    "CodeFeedBack-MT": "Given a multi-turn conversation history that consists of a mix of text and code snippets, retrieve relevant answers that also consist of a mix of text and code snippets, and can help answer the question.",
    "HummanEval": "Given a question that consists of a mix of text and code snippets, retrieve relevant answers that also consist of a mix of text and code snippets, and can help answer the question.",
    "MBPP": "Given a textual explanation of code functionality, retrieve the corresponding code implementation.",
    "DS-1000": "Given a question that consists of a mix of text and code snippets, retrieve relevant answers that also consist of a mix of text and code snippets, and can help answer the question.",
    "ODEX": "Given a question, retrieve relevant answers that also consist of a mix of text and code snippets, and can help answer the question.",
    "RepoEval": "Given a piece of code segment, retrieve the code segment that is the latter part of the code.",
    "SWE-bench-Lite": "Given a code snippet containing a bug and a natural language description of the bug or error, retrieve code snippets that demonstrate solutions or fixes for similar bugs or errors (the desired documents)."
 }
 ```
 ## Citation
 If you find this repository useful, please consider giving a star :star: and citation
 ```
@misc{bge_code,
    title={Towards A Generalist Code Embedding Model Based On Massive Data Synthesis}, 
    author={Chaofan Li and Jianlyu Chen and Yingxia Shao and Defu Lian and Zheng Liu},
    year={2025},
    eprint={2505.12697},
    archivePrefix={arXiv},
    primaryClass={cs.IR},
    url={https://arxiv.org/abs/2505.12697}, 
 }
 ```
--- a/added_tokens.json
+++ b/added_tokens.json
@@ -0,0 +1,26 @@
 {
  "</tool_call>": 151658,
  "<instruct>": 151665,
  "<query>": 151666,
  "<tool_call>": 151657,
  "<|box_end|>": 151649,
  "<|box_start|>": 151648,
  "<|endoftext|>": 151643,
  "<|file_sep|>": 151664,
  "<|fim_middle|>": 151660,
  "<|fim_pad|>": 151662,
  "<|fim_prefix|>": 151659,
  "<|fim_suffix|>": 151661,
  "<|im_end|>": 151645,
  "<|im_start|>": 151644,
  "<|image_pad|>": 151655,
  "<|object_ref_end|>": 151647,
  "<|object_ref_start|>": 151646,
  "<|quad_end|>": 151651,
  "<|quad_start|>": 151650,
  "<|repo_name|>": 151663,
  "<|video_pad|>": 151656,
  "<|vision_end|>": 151653,
  "<|vision_pad|>": 151654,
  "<|vision_start|>": 151652
 }
--- a/config.json
+++ b/config.json
@@ -0,0 +1,29 @@
 {
  "_name_or_path": "bge-code-v1",
  "architectures": [
    "Qwen2Model"
  ],
  "attention_dropout": 0.0,
  "bos_token_id": 151643,
  "eos_token_id": 151643,
  "hidden_act": "silu",
  "hidden_size": 1536,
  "initializer_range": 0.02,
  "intermediate_size": 8960,
  "max_position_embeddings": 32768,
  "max_window_layers": 28,
  "model_type": "qwen2",
  "num_attention_heads": 12,
  "num_hidden_layers": 28,
  "num_key_value_heads": 2,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "rope_theta": 1000000.0,
  "sliding_window": null,
  "tie_word_embeddings": true,
  "torch_dtype": "float32",
  "transformers_version": "4.49.0",
  "use_cache": false,
  "use_sliding_window": false,
  "vocab_size": 151667
 }
--- a/config_sentence_transformers.json
+++ b/config_sentence_transformers.json
@@ -0,0 +1,10 @@
 {
  "__version__": {
    "sentence_transformers": "3.4.1",
    "transformers": "4.49.0",
    "pytorch": "2.5.1+cu124"
  },
  "prompts": {},
  "default_prompt_name": null,
  "similarity_fn_name": "cosine"
 }
--- a/merges.txt
+++ b/merges.txt
--- a/model-00001-of-00002.safetensors
+++ b/model-00001-of-00002.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:3f0012a815833b137559c60485e3d087fb029e034b7b5e5c18fa9cabcc3faafc
 size 4995016160
--- a/model-00002-of-00002.safetensors
+++ b/model-00002-of-00002.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:93d0d51780cad746744bed246b4c13e520e458ba36509c33aff1ed0c9ac16d71
 size 1178224504
--- a/model.safetensors.index.json
+++ b/model.safetensors.index.json
@@ -0,0 +1,345 @@
 {
  "metadata": {
    "total_size": 6173204480
  },
  "weight_map": {
    "embed_tokens.weight": "model-00001-of-00002.safetensors",
    "layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.0.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.0.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.0.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.0.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.0.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.1.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.1.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.1.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.1.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.1.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.10.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.10.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.10.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.10.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.10.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.11.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.11.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.11.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.11.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.11.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.12.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.12.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.12.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.12.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.12.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.13.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.13.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.13.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.13.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.13.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.14.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.14.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.14.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.14.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.14.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.15.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.15.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.15.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.15.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.15.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.16.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.16.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.16.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.16.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.16.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.17.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.17.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.17.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.17.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.17.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.18.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.18.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.18.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.18.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.18.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.19.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.19.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.19.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.19.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.19.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.2.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.2.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.2.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.2.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.2.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.20.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.20.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.20.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.20.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.20.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.21.input_layernorm.weight": "model-00002-of-00002.safetensors",
    "layers.21.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
    "layers.21.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.21.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.21.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
    "layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.21.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.22.input_layernorm.weight": "model-00002-of-00002.safetensors",
    "layers.22.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
    "layers.22.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
    "layers.22.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
    "layers.22.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
    "layers.22.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
    "layers.22.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
    "layers.22.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
    "layers.22.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
    "layers.22.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
    "layers.22.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
    "layers.22.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
    "layers.23.input_layernorm.weight": "model-00002-of-00002.safetensors",
    "layers.23.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
    "layers.23.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
    "layers.23.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
    "layers.23.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
    "layers.23.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
    "layers.23.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
    "layers.23.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
    "layers.23.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
    "layers.23.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
    "layers.23.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
    "layers.23.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
    "layers.24.input_layernorm.weight": "model-00002-of-00002.safetensors",
    "layers.24.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
    "layers.24.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
    "layers.24.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
    "layers.24.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
    "layers.24.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
    "layers.24.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
    "layers.24.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
    "layers.24.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
    "layers.24.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
    "layers.24.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
    "layers.24.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
    "layers.25.input_layernorm.weight": "model-00002-of-00002.safetensors",
    "layers.25.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
    "layers.25.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
    "layers.25.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
    "layers.25.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
    "layers.25.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
    "layers.25.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
    "layers.25.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
    "layers.25.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
    "layers.25.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
    "layers.25.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
    "layers.25.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
    "layers.26.input_layernorm.weight": "model-00002-of-00002.safetensors",
    "layers.26.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
    "layers.26.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
    "layers.26.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
    "layers.26.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
    "layers.26.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
    "layers.26.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
    "layers.26.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
    "layers.26.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
    "layers.26.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
    "layers.26.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
    "layers.26.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
    "layers.27.input_layernorm.weight": "model-00002-of-00002.safetensors",
    "layers.27.mlp.down_proj.weight": "model-00002-of-00002.safetensors",
    "layers.27.mlp.gate_proj.weight": "model-00002-of-00002.safetensors",
    "layers.27.mlp.up_proj.weight": "model-00002-of-00002.safetensors",
    "layers.27.post_attention_layernorm.weight": "model-00002-of-00002.safetensors",
    "layers.27.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
    "layers.27.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
    "layers.27.self_attn.o_proj.weight": "model-00002-of-00002.safetensors",
    "layers.27.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
    "layers.27.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
    "layers.27.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
    "layers.27.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
    "layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.3.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.3.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.3.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.3.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.3.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.4.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.4.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.4.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.4.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.4.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.5.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.5.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.5.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.5.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.5.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.6.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.6.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.6.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.6.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.6.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.7.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.7.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.7.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.7.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.7.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.8.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.8.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.8.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.8.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.8.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.9.mlp.down_proj.weight": "model-00001-of-00002.safetensors",
    "layers.9.mlp.gate_proj.weight": "model-00001-of-00002.safetensors",
    "layers.9.mlp.up_proj.weight": "model-00001-of-00002.safetensors",
    "layers.9.post_attention_layernorm.weight": "model-00001-of-00002.safetensors",
    "layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
    "layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
    "layers.9.self_attn.o_proj.weight": "model-00001-of-00002.safetensors",
    "layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
    "layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
    "layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
    "layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
    "norm.weight": "model-00002-of-00002.safetensors"
  }
 }
--- a/modules.json
+++ b/modules.json
@@ -0,0 +1,20 @@
 [
  {
    "idx": 0,
    "name": "0",
    "path": "",
    "type": "sentence_transformers.models.Transformer"
  },
  {
    "idx": 1,
    "name": "1",
    "path": "1_Pooling",
    "type": "sentence_transformers.models.Pooling"
  },
  {
    "idx": 2,
    "name": "2",
    "path": "2_Normalize",
    "type": "sentence_transformers.models.Normalize"
  }
 ]
--- a/sentence_bert_config.json
+++ b/sentence_bert_config.json
@@ -0,0 +1,4 @@
 {
  "max_seq_length": 32768,
  "do_lower_case": false
 }
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,20 @@
 {
  "additional_special_tokens": [
    "<instruct>",
    "<query>"
  ],
  "eos_token": {
    "content": "<|endoftext|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "pad_token": {
    "content": "<|endoftext|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  }
 }
--- a/tokenization_qwen.py
+++ b/tokenization_qwen.py
@@ -0,0 +1,250 @@
 """
 Copied from https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct/blob/main/tokenization_qwen.py
 """
 from typing import List, Optional
 from transformers.models.qwen2.tokenization_qwen2 import Qwen2Tokenizer as OriginalQwen2Tokenizer
 from transformers.models.qwen2.tokenization_qwen2_fast import Qwen2TokenizerFast as OriginalQwen2TokenizerFast
 from tokenizers import processors
 VOCAB_FILES_NAMES = {
    "vocab_file": "vocab.json",
    "merges_file": "merges.txt",
    "tokenizer_file": "tokenizer.json",
 }
 class Qwen2Tokenizer(OriginalQwen2Tokenizer):
    """
    Construct a Qwen2 tokenizer. Based on byte-level Byte-Pair-Encoding.
    Same with GPT2Tokenizer, this tokenizer has been trained to treat spaces like parts of the tokens so a word will
    be encoded differently whether it is at the beginning of the sentence (without space) or not:
    ```python
    >>> from transformers import Qwen2Tokenizer
    >>> tokenizer = Qwen2Tokenizer.from_pretrained("Qwen/Qwen-tokenizer")
    >>> tokenizer("Hello world")["input_ids"]
    [9707, 1879]
    >>> tokenizer(" Hello world")["input_ids"]
    [21927, 1879]
    ```
    This is expected.
    You should not use GPT2Tokenizer instead, because of the different pretokenization rules.
    This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods. Users should refer to
    this superclass for more information regarding those methods.
    Args:
        vocab_file (`str`):
            Path to the vocabulary file.
        merges_file (`str`):
            Path to the merges file.
        errors (`str`, *optional*, defaults to `"replace"`):
            Paradigm to follow when decoding bytes to UTF-8. See
            [bytes.decode](https://docs.python.org/3/library/stdtypes.html#bytes.decode) for more information.
        unk_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
        bos_token (`str`, *optional*):
            The beginning of sequence token. Not applicable for this tokenizer.
        eos_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
            The end of sequence token.
        pad_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
            The token used for padding, for example when batching sequences of different lengths.
        clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
            Whether or not the model should cleanup the spaces that were added when splitting the input text during the
            tokenization process. Not applicable to this tokenizer, since tokenization does not add spaces.
        split_special_tokens (`bool`, *optional*, defaults to `False`):
            Whether or not the special tokens should be split during the tokenization process. The default behavior is
            to not split special tokens. This means that if `<|endoftext|>` is the `eos_token`, then `tokenizer.tokenize("<|endoftext|>") =
            ['<|endoftext|>`]. Otherwise, if `split_special_tokens=True`, then `tokenizer.tokenize("<|endoftext|>")` will be give `['<',
            '|', 'endo', 'ft', 'ext', '|', '>']`. This argument is only supported for `slow` tokenizers for the moment.
        add_eos_token (`bool`, *optional*, defaults to `False`):
            Whether or not to add an `eos_token` at the end of sequences.
    """
    def __init__(
        self,
        vocab_file,
        merges_file,
        errors="replace",
        unk_token="<|endoftext|>",
        bos_token=None,
        eos_token="<|endoftext|>",
        pad_token="<|endoftext|>",
        clean_up_tokenization_spaces=False,
        split_special_tokens=False,
        add_eos_token=False,
        **kwargs,
    ):
        # The add_eos_token code was inspired by the LlamaTokenizer
        self.add_eos_token = add_eos_token
        super().__init__(
            vocab_file=vocab_file,
            merges_file=merges_file,
            errors=errors,
            unk_token=unk_token,
            bos_token=bos_token,
            eos_token=eos_token,
            pad_token=pad_token,
            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
            split_special_tokens=split_special_tokens,
            add_eos_token=add_eos_token,
            **kwargs,
        )
    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
        eos_token_id = [self.eos_token_id] if self.add_eos_token else []
        output = token_ids_0 + eos_token_id
        if token_ids_1 is not None:
            output = output + token_ids_1 + eos_token_id
        return output
    def get_special_tokens_mask(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
    ) -> List[int]:
        """
        Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
        special tokens using the tokenizer `prepare_for_model` method.
        Args:
            token_ids_0 (`List[int]`):
                List of IDs.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.
            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
                Whether or not the token list is already formatted with special tokens for the model.
        Returns:
            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
        """
        if already_has_special_tokens:
            return super().get_special_tokens_mask(
                token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
            )
        eos_token_id = [1] if self.add_eos_token else []
        if token_ids_1 is None:
            return ([0] * len(token_ids_0)) + eos_token_id
        return (
            ([0] * len(token_ids_0))
            + eos_token_id
            + ([0] * len(token_ids_1))
            + eos_token_id
        )
    def create_token_type_ids_from_sequences(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
        sequence pair mask has the following format:
        ```
        0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
        | first sequence    | second sequence |
        ```
        if token_ids_1 is None, only returns the first portion of the mask (0s).
        Args:
            token_ids_0 (`List[int]`):
                List of ids.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.
        Returns:
            `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
        """
        eos_token_id = [self.eos_token_id] if self.add_eos_token else []
        output = [0] * len(token_ids_0 + eos_token_id)
        if token_ids_1 is not None:
            output += [1] * len(token_ids_1 + eos_token_id)
        return output
 class Qwen2TokenizerFast(OriginalQwen2TokenizerFast):
    """
    Construct a "fast" Qwen2 tokenizer (backed by HuggingFace's *tokenizers* library). Based on byte-level
    Byte-Pair-Encoding.
    Same with GPT2Tokenizer, this tokenizer has been trained to treat spaces like parts of the tokens so a word will
    be encoded differently whether it is at the beginning of the sentence (without space) or not:
    ```python
    >>> from transformers import Qwen2TokenizerFast
    >>> tokenizer = Qwen2TokenizerFast.from_pretrained("Qwen/Qwen-tokenizer")
    >>> tokenizer("Hello world")["input_ids"]
    [9707, 1879]
    >>> tokenizer(" Hello world")["input_ids"]
    [21927, 1879]
    ```
    This is expected.
    This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main methods. Users should
    refer to this superclass for more information regarding those methods.
    Args:
        vocab_file (`str`, *optional*):
            Path to the vocabulary file.
        merges_file (`str`, *optional*):
            Path to the merges file.
        tokenizer_file (`str`, *optional*):
            Path to [tokenizers](https://github.com/huggingface/tokenizers) file (generally has a .json extension) that
            contains everything needed to load the tokenizer.
        unk_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead. Not applicable to this tokenizer.
        bos_token (`str`, *optional*):
            The beginning of sequence token. Not applicable for this tokenizer.
        eos_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
            The end of sequence token.
        pad_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
            The token used for padding, for example when batching sequences of different lengths.
        add_eos_token (`bool`, *optional*, defaults to `False`):
            Whether or not to add an `eos_token` at the end of sequences.
    """
    slow_tokenizer_class = Qwen2Tokenizer
    padding_side = "left"
    def __init__(
        self,
        vocab_file=None,
        merges_file=None,
        tokenizer_file=None,
        unk_token="<|endoftext|>",
        bos_token=None,
        eos_token="<|endoftext|>",
        pad_token="<|endoftext|>",
        add_eos_token=False,
        **kwargs,
    ):
        super().__init__(
            vocab_file=vocab_file,
            merges_file=merges_file,
            tokenizer_file=tokenizer_file,
            unk_token=unk_token,
            bos_token=bos_token,
            eos_token=eos_token,
            pad_token=pad_token,
            **kwargs,
        )
        self._add_eos_token = add_eos_token
        self.update_post_processor()
    def update_post_processor(self):
        """
        Updates the underlying post processor with the current `eos_token`.
        """
        eos = self.eos_token
        eos_token_id = self.eos_token_id
        if eos is None and self.add_eos_token:
            raise ValueError("add_eos_token = True but eos_token = None")
        single = f"$A:0{(' '+eos+':0') if self.add_eos_token else ''}"
        pair = f"{single} $B:1{(' '+eos+':1') if self.add_eos_token else ''}"
        special_tokens = []
        if self.add_eos_token:
            special_tokens.append((eos, eos_token_id))
        self._tokenizer.post_processor = processors.TemplateProcessing(
            single=single, pair=pair, special_tokens=special_tokens
        )
    @property
    def add_eos_token(self):
        return self._add_eos_token
--- a/tokenizer.json
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:a56524092f5d0676e63537511b535e73e7580a7efe440247ef3fa43d019a0af0
 size 11422261
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,220 @@
 {
  "add_bos_token": false,
  "add_eos_token": true,
  "add_prefix_space": false,
  "added_tokens_decoder": {
    "151643": {
      "content": "<|endoftext|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151644": {
      "content": "<|im_start|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151645": {
      "content": "<|im_end|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151646": {
      "content": "<|object_ref_start|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151647": {
      "content": "<|object_ref_end|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151648": {
      "content": "<|box_start|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151649": {
      "content": "<|box_end|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151650": {
      "content": "<|quad_start|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151651": {
      "content": "<|quad_end|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151652": {
      "content": "<|vision_start|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151653": {
      "content": "<|vision_end|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151654": {
      "content": "<|vision_pad|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151655": {
      "content": "<|image_pad|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151656": {
      "content": "<|video_pad|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151657": {
      "content": "<tool_call>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151658": {
      "content": "</tool_call>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151659": {
      "content": "<|fim_prefix|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151660": {
      "content": "<|fim_middle|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151661": {
      "content": "<|fim_suffix|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151662": {
      "content": "<|fim_pad|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151663": {
      "content": "<|repo_name|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151664": {
      "content": "<|file_sep|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": false
    },
    "151665": {
      "content": "<instruct>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151666": {
      "content": "<query>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    }
  },
  "additional_special_tokens": [
    "<instruct>",
    "<query>"
  ],
  "auto_map": {
    "AutoTokenizer": [
      "tokenization_qwen.Qwen2Tokenizer",
      null
    ]
  },
  "bos_token": null,
  "chat_template": "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n    {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>' + message.role }}\n        {%- if message.content %}\n            {{- '\\n' + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls %}\n            {%- if tool_call.function is defined %}\n                {%- set tool_call = tool_call.function %}\n            {%- endif %}\n            {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{- '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
  "clean_up_tokenization_spaces": false,
  "eos_token": "<|endoftext|>",
  "errors": "replace",
  "extra_special_tokens": {},
  "model_max_length": 256,
  "pad_token": "<|endoftext|>",
  "split_special_tokens": false,
  "tokenizer_class": "Qwen2Tokenizer",
  "unk_token": null
 }
--- a/vocab.json
+++ b/vocab.json