初始化项目，由ModelHub XC社区提供模型

Model: infly/inf-retriever-v1-1.5b Source: Original Platform
2026-05-14 14:07:39 +08:00
commit f44886b1e0
16 changed files with 456656 additions and 0 deletions
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,35 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.npy filter=lfs diff=lfs merge=lfs -text
 *.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pickle filter=lfs diff=lfs merge=lfs -text
 *.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 *.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
--- a/1_Pooling/config.json
+++ b/1_Pooling/config.json
@@ -0,0 +1,10 @@
 {
  "word_embedding_dimension": 1536,
  "pooling_mode_cls_token": false,
  "pooling_mode_mean_tokens": false,
  "pooling_mode_max_tokens": false,
  "pooling_mode_mean_sqrt_len_tokens": false,
  "pooling_mode_weightedmean_tokens": false,
  "pooling_mode_lasttoken": true,
  "include_prompt": true
 }
--- a/README.md
+++ b/README.md
@@ -0,0 +1,179 @@
 ---
 base_model:
 - Alibaba-NLP/gte-Qwen2-1.5B-instruct
 language:
 - en
 - zh
 license: apache-2.0
 tags:
 - sentence-transformers
 - transformers
 - sentence-similarity
 ---
 # INF-Retriever-v1-1.5B
 ## Model Overview
 - **INF-Retriever-v1-1.5B** is a lightweight version of the [**INF-Retriever-v1**](https://huggingface.co/infly/inf-retriever-v1), an LLM-based dense retrieval model developed by INF TECH. 
 It is built upon the [gte-Qwen2-1.5B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct) model and specifically fine-tuned to excel in retrieval tasks, particularly for Chinese and English data. 
 - As of February 19, 2025, **INF-Retriever-v1-1.5B** ranks both **No.1** on the Automated Heterogeneous Information Retrieval Benchmark of version 24.04 & 24.05([AIR-Bench](https://huggingface.co/spaces/AIR-Bench/leaderboard)) for the bilingual Chinese and English sub-leaderboard, among models with fewer than 7B parameters. This demonstrates its cutting-edge performance in heterogeneous information retrieval tasks.
 ## Key Features
 - **Optimized for Chinese and English retrieval**: The model has been specifically fine-tuned with retrieval-focused datasets in both languages, significantly improving its accuracy and efficiency for a variety of retrieval scenarios.
 - **Top-tier performance**: **INF-Retriever-v1-1.5B** has achieved outstanding results on the AIR-Bench leaderboard, making it a top choice for heterogeneous information retrieval tasks across various domains.
 ## Model Details
 - Model Size: 1.5B
 - Embedding Dimension: 1536
 - Max Input Tokens: 32768
 - Language Support: Chinese & English (also effective in other languages)
 ## Usage
 ### Sentence Transformers
 ```python
 from sentence_transformers import SentenceTransformer
 model = SentenceTransformer("infly/inf-retriever-v1-1.5b", trust_remote_code=True)
 # In case you want to reduce the maximum length:
 model.max_seq_length = 8192
 queries = [
    "how much protein should a female eat",
    "summit define",
 ]
 documents = [
    "As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
    "Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments.",
 ]
 query_embeddings = model.encode(queries, prompt_name="query")
 document_embeddings = model.encode(documents)
 scores = (query_embeddings @ document_embeddings.T) * 100
 print(scores.tolist())
 # [[89.36092376708984, 69.16694641113281], [57.51953125, 79.65923309326172]]
 ```
 ### Transformers
 ```python
 import torch
 import torch.nn.functional as F
 from torch import Tensor
 from transformers import AutoTokenizer, AutoModel
 def last_token_pool(last_hidden_states: Tensor,
                 attention_mask: Tensor) -> Tensor:
    left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
    if left_padding:
        return last_hidden_states[:, -1]
    else:
        sequence_lengths = attention_mask.sum(dim=1) - 1
        batch_size = last_hidden_states.shape[0]
        return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
 def get_detailed_instruct(task_description: str, query: str) -> str:
    return f'Instruct: {task_description}\nQuery: {query}'
 # Each query must come with a one-sentence instruction that describes the task
 task = 'Given a web search query, retrieve relevant passages that answer the query'
 queries = [
    get_detailed_instruct(task, 'how much protein should a female eat'),
    get_detailed_instruct(task, 'summit define')
 ]
 # No need to add instruction for retrieval documents
 documents = [
    "As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.",
    "Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments."
 ]
 input_texts = queries + documents
 tokenizer = AutoTokenizer.from_pretrained('infly/inf-retriever-v1-1.5b', trust_remote_code=True)
 model = AutoModel.from_pretrained('infly/inf-retriever-v1-1.5b', trust_remote_code=True)
 max_length = 8192
 # Tokenize the input texts
 batch_dict = tokenizer(input_texts, max_length=max_length, padding=True, truncation=True, return_tensors='pt')
 outputs = model(**batch_dict)
 embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
 # normalize embeddings
 embeddings = F.normalize(embeddings, p=2, dim=1)
 scores = (embeddings[:2] @ embeddings[2:].T) * 100
 print(scores.tolist())
 # [[89.36091613769531, 69.16694641113281], [57.519447326660156, 79.65917205810547]]
 ```
 ## Evaluation
 ### AIR-Bench
 **INF-Retriever-v1-1.5B** has demonstrated superior retrieval capabilities across multiple domains and languages. The results from the Automated Heterogeneous Information Retrieval Benchmark ([AIR-Bench](https://huggingface.co/spaces/AIR-Bench/leaderboard)) as of February 19, 2025, are as follows:
 #### AIR-Bench_24.04 (Bilingual, EN & ZH)
 | Model Name                                                                            | Average⬆️ | wiki_en   | wiki_zh   | web_en    | web_zh   | healthcare_en | healthcare_zh | law_en    | arxiv_en  | news_en   | news_zh   | finance_en | finance_zh | msmarco_en | 
 |---------------------------------------------------------------------------------------|-----------|-----------|-----------|-----------|----------|---------------|---------------|-----------|-----------|-----------|-----------|------------|------------|------------|
 | [INF-Retriever-v1](https://huggingface.co/infly/inf-retriever-v1)                     | **52.56** | **65.25** | **68.44** | **52.13** | **56.6** | **56.96**     | 42.03         | **34.51** | **50.62** | 53.32     | **50.02** | **58.34**  | **35.42**  | 59.64      |
 | **INF-Retriever-v1-1.5B**                                                             | 49.77     | 62.87     | 65.98     | 50.16     | 53.8     | 54.48         | 40.22         | 32        | 45.3      | 51.47     | 46.02     | 56.81      | 31.15      | 56.73      |
 | [GTE-Qwen2-7B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct)     | 48.38     | 63.46     | 66.44     | 51.2      | 51.98    | 54.2          | 38.82         | 22.31     | 40.27     | **54.07** | 43.03     | 58.2       | 26.63      | 58.39      |
 | [BGE-Multilingual-Gemma2](https://huggingface.co/BAAI/bge-multilingual-gemma2)        | 46.83     | 63.71     | 67.3      | 50.38     | 53.24    | 47.24         | 42.13         | 22.58     | 23.28     | 50.91     | 44.02     | 49.3       | 31.6       | **63.14**  |
 | [BGE-M3](https://huggingface.co/BAAI/bge-m3)                                          | 46.65     | 60.49     | 62.36     | 47.35     | 50.38    | 49.1          | **42.38**     | 26.68     | 40.76     | 48.04     | 40.75     | 51.52      | 32.18      | 54.4       |
 | [E5-mistral-7b-instruct](https://huggingface.co/intfloat/e5-mistral-7b-instruct)      | 45.26     | 61.67     | 55.97     | 44.41     | 45.96    | 56.32         | 35.79         | 19.32     | 44.78     | 48.18     | 35.99     | 54.79      | 26.11      | 59.03      |
 | [Multilingual-E5-large](https://huggingface.co/intfloat/multilingual-e5-large)        | 42.58     | 53.76     | 60.57     | 37.55     | 48.27    | 50.63         | 33.74         | 19.66     | 36.93     | 43.5      | 39.72     | 47.77      | 26.98      | 54.44      |
 | [GTE-Qwen1.5-7B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen1.5-7B-instruct) | 41.61     | 57.05     | 52.89     | 43.17     | 44.9     | 54.44         | 37.42         | 11.85     | 32.31     | 50.07     | 24.19     | 55.16      | 26.09      | 51.35      |
 #### AIR-Bench_24.05 (Multilingual, 13 languages)
 ##### Bilingual (EN & ZH)
 | Model Name                                                                            | Average⬆️ | wiki_en   | wiki_zh   | web_en    | web_zh    | healthcare_en | healthcare_zh | law_en    | arxiv_en  | news_en   | news_zh   | finance_en | finance_zh |
 |---------------------------------------------------------------------------------------|-----------|-----------|-----------|-----------|-----------|---------------|---------------|-----------|-----------|-----------|-----------|------------|------------|
 | [INF-Retriever-v1](https://huggingface.co/infly/inf-retriever-v1)                     | **54.01** | 73.52     | **69.45** | 57.6      | **56.46** | **57.03**     | 41.82         | **34.76** | **51.38** | 52.7      | **49.78** | **59.44**  | **44.13**  |
 | **INF-Retriever-v1-1.5B**                                                             | 51.28     | 71.58     | 67.04     | 55.93     | 53.23     | 54.72         | 40.35         | 32.37     | 46.34     | 50.66     | 45.7      | 58.08      | 39.37      |
 | [GTE-Qwen2-7B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct)     | 49.89     | **73.59** | 67.5      | **58.99** | 51.66     | 54.46         | 38.66         | 22.75     | 41.32     | **52.74** | 43.17     | 59.23      | 34.61      |
 | [BGE-M3](https://huggingface.co/BAAI/bge-m3)                                          | 48.23     | 69.7      | 63.52     | 53.88     | 50.2      | 49.05         | 42.31         | 26.95     | 41.64     | 47.34     | 41        | 52.92      | 40.23      |
 | [BGE-Multilingual-Gemma2](https://huggingface.co/BAAI/bge-multilingual-gemma2)        | 47.53     | 72.8      | 68.64     | 56.48     | 53.04     | 47.48         | **42.35**     | 22.6      | 24        | 50.29     | 43.42     | 50.08      | 39.23      |
 | [E5-mistral-7b-instruct](https://huggingface.co/intfloat/e5-mistral-7b-instruct)      | 46.43     | 71.38     | 57.19     | 52.08     | 45.68     | 56.24         | 36.05         | 19.61     | 46.06     | 47.89     | 35.98     | 55.9       | 33.1       |
 | [GTE-Qwen1.5-7B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen1.5-7B-instruct) | 45.99     | 66.45     | 58.33     | 52.68     | 47.48     | 52.11         | 39.13         | 20.19     | 42.15     | 47.44     | 36.43     | 55.21      | 34.28      |
 | [GTE-multilingual-base](https://huggingface.co/Alibaba-NLP/gte-multilingual-base)     | 45.14     | 69.12     | 61.86     | 52.05     | 46.75     | 47.48         | 37.94         | 11.44     | 41.28     | 47.54     | 36.2      | 53.24      | 36.84      |
 ##### Multilingual (13 languages)
 Although INF-Retriever-v1-1.5B has been fine-tuned exclusively on English and Chinese, it continues to perform exceptionally well across other languages.
 | Model Name                                                                                       | Average⬆️ | wiki_en   | wiki_zh   | wiki_ar   | wiki_bn   | wiki_de   | wiki_es   | wiki_fa   | wiki_fr   | wiki_hi   | wiki_id   | wiki_ja   | wiki_ko   | wiki_ru   | web_en    | web_zh    | web_ar    | web_bn    | web_de    | web_es   | web_fa    | web_fr    | web_hi   | web_id | web_ja    | web_ko    | web_ru    | healthcare_en | healthcare_zh | healthcare_de | healthcare_es | healthcare_fr | law_en    | law_de    | law_fr    | arxiv_en  | science_ru | news_en   | news_zh   | news_ar   | news_bn   | news_de   | news_es  | news_fa   | news_fr  | news_hi   | news_id   | news_ja   | news_ko   | news_ru   | finance_en | finance_zh | finance_ar | finance_fr |
 |--------------------------------------------------------------------------------------------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|----------|-----------|-----------|----------|--------|-----------|-----------|-----------|---------------|---------------|---------------|---------------|---------------|-----------|-----------|-----------|-----------|------------|-----------|-----------|-----------|-----------|-----------|----------|-----------|----------|-----------|-----------|-----------|-----------|-----------|------------|------------|------------|------------|
 | [INF-Retriever-v1](https://huggingface.co/infly/inf-retriever-v1)                                | **54.47** | 73.52     | **69.45** | 63.13     | 61.58     | 66.8      | 69.29     | 63.03     | 69.74     | 69.02     | 68.63     | 63.45     | 64.44     | 62.74     | 57.6      | **56.46** | 58.48     | 53.7      | 55.2      | 57.08    | 53.27     | 57.35     | 55.64    | 58.85  | 59.52     | **60.01** | 58.79     | **57.03**     | 41.82         | **55.46**     | 57.6          | 43.25         | **34.76** | **21.75** | **21.87** | **51.38** | **59.72**  | 52.7      | **49.78** | **49.11** | 43.62     | 51.47     | 49.52    | 40.43     | 54.54    | 38.57     | **51.06** | 51.12     | **53.15** | **51.88** | **59.44**  | **44.13**  | **50.71**  | 44.2       |
 | **INF-Retriever-v1-1.5B**                                                                        | 50        | 71.58     | 67.04     | 59.44     | 56.53     | 64.11     | 67.57     | 57.75     | 68.12     | 63.86     | 64.64     | 62.02     | 63.43     | 60.6      | 55.93     | 53.23     | 52.7      | 43.52     | 50.65     | 52.97    | 47.64     | 53.76     | 43.05    | 54.55  | 56.95     | 56.49     | 55.05     | 54.72         | 40.35         | 48.68         | 54.29         | 39.28         | 32.37     | 18.12     | 17.79     | 46.34     | 54.7       | 50.66     | 45.7      | 43.84     | 24.33     | 47.72     | 43.8     | 32.64     | 51.49    | 27.05     | 44.49     | 47.62     | 49.3      | 47.59     | 58.08      | 39.37      | 45.99      | 40.57      |
 | [BGE-Multilingual-Gemma2](https://huggingface.co/BAAI/bge-multilingual-gemma2)                   | 54.46     | 72.8      | 68.64     | **63.42** | **69.48** | **67.91** | **71.79** | **67.57** | **71.28** | **75.39** | **68.91** | **68.29** | **66.78** | **64.15** | 56.48     | 53.04     | **59.97** | **59.68** | **57.72** | **58.2** | **62.43** | **59.54** | **64.5** | **60** | **60.26** | 59.64     | **60.12** | 47.48         | **42.35**     | 55.4          | **63.13**     | **45.13**     | 22.6      | 15.75     | 14.29     | 24        | 44.13      | 50.29     | 43.42     | 48.41     | 58.77     | **52.05** | **49.9** | 43.4      | **56.8** | **44.89** | 50.65     | **51.51** | 51.64     | 51.48     | 50.08      | 39.23      | 50.25      | **51.1**   |
 | [BGE-M3](https://huggingface.co/BAAI/bge-m3)                                                     | 51.31     | 69.7      | 63.52     | 59.65     | 64.33     | 64.68     | 65.4      | 61.14     | 66.04     | 69.02     | 66.3      | 60.86     | 62.36     | 60.18     | 53.88     | 50.2      | 52.53     | 55.53     | 51.89     | 51.78    | 55.81     | 51.46     | 57.06    | 53.14  | 54.75     | 55.28     | 54.53     | 49.05         | 42.31         | 49            | 53.05         | 39.29         | 26.95     | 20.11     | 20.2      | 41.64     | 55.18      | 47.34     | 41        | 44.93     | 59.03     | 47.87     | 44.7     | 43.81     | 49.52    | 42.12     | 47.45     | 47.09     | 48.14     | 48.31     | 52.92      | 40.23      | 45.76      | 41.44      |
 | [Multilingual-E5-large-instruct](https://huggingface.co/intfloat/multilingual-e5-large-instruct) | 51.11     | 68.62     | 62.82     | 63.21     | 64.45     | 65.81     | 68.1      | 64.2      | 69.72     | 71.81     | 66.36     | 64.12     | 64.79     | 62.57     | 41.58     | 47.06     | 56.4      | 56.17     | 50.87     | 52.24    | 58.68     | 50.2      | 56.32    | 54.49  | 54.89     | 55.81     | 54.97     | 54.02         | 39.76         | 52.06         | 51.74         | 36.64         | 16.9      | 15.59     | 15.12     | 39.52     | 56.86      | 44.28     | 35.46     | 48.2      | 49.31     | 47.84     | 45.99    | **45.59** | 50.58    | 39.66     | 48.59     | 47.6      | 50.52     | 48.81     | 52.79      | 37.72      | 48.95      | 42.74      |
 | [GTE-Qwen2-7B-instruct](https://huggingface.co/Alibaba-NLP/gte-Qwen2-7B-instruct)                | 50.05     | **73.59** | 67.5      | 59.44     | 58.17     | 63.96     | 67.62     | 57.05     | 70.32     | 60.54     | 61.81     | 62.88     | 59.17     | 62.95     | **58.99** | 51.66     | 55.56     | 51.45     | 48.62     | 54.11    | 49.54     | 55.16     | 53.06    | 55.51  | 57.27     | 57.54     | 55.88     | 54.46         | 38.66         | 53.92         | 53.78         | 30.29         | 22.75     | 13.18     | 13.15     | 41.32     | 45.21      | **52.74** | 43.17     | 37.63     | **61.31** | 44.89     | 45.21    | 30.1      | 49.76    | 30.28     | 46.44     | 44.13     | 47.19     | 46.55     | 59.23      | 34.61      | 43.56      | 39.57      |
 | [jina-embeddings-v3](https://huggingface.co/jinaai/jina-embeddings-v3)                           | 48.46     | 64.96     | 62.7      | 57.89     | 62.81     | 62.08     | 63.65     | 57.75     | 64.67     | 68.74     | 62.75     | 58.26     | 58.28     | 59.41     | 47.38     | 47.66     | 53.4      | 55.55     | 48.06     | 49.42    | 52.84     | 48.8      | 58.79    | 52.76  | 50.1      | 51.87     | 50.51     | 49.42         | 38.92         | 49.86         | 52.75         | 32.68         | 16.78     | 11.71     | 9.76      | 39.65     | 50.24      | 45.61     | 40.56     | 44.04     | 53.73     | 46.39     | 42.94    | 37.9      | 46.56    | 40.02     | 44.86     | 41.96     | 45.18     | 46.65     | 51.7       | 33.96      | 46.32      | 37.14      |
 | [E5-mistral-7b-instruct](https://huggingface.co/intfloat/e5-mistral-7b-instruct)                 | 48.08     | 71.38     | 57.19     | 52.98     | 56.84     | 65.4      | 69.49     | 51.77     | 69.29     | 63.93     | 66.23     | 57.72     | 60.3      | 58.7      | 52.08     | 45.68     | 49.56     | 46.83     | 50.88     | 54.46    | 45.86     | 54.52     | 49.43    | 55.17  | 51.8      | 54.22     | 53.85     | 56.24         | 36.05         | 53.12         | 47.67         | 37.28         | 19.61     | 14.77     | 14.38     | 46.06     | 53.07      | 47.89     | 35.98     | 38.95     | 25.5      | 46.48     | 45.34    | 29.72     | 49.61    | 29.82     | 45.93     | 43.47     | 46.46     | 46.59     | 55.9       | 33.1       | 44.59      | 38.98      |
 ## Contributors
 ### Supervisors
 Wei Chu • Yinghui Xu • Yuan Qi
 ### INF memory team
 Junhan Yang • Jiahe Wan • Yichen Yao (eason.yyc@inftech.ai)
 ## Citation
 If you find our model useful, please consider citing:
 ```
@misc {infly-ai_2025,
 	author       = { Junhan Yang, Jiahe Wan, Yichen Yao, Wei Chu, Yinghui Xu, Emma Wang, Yuan Qi },
 	title        = { inf-retriever-v1 (Revision 5f469d7) },
 	year         = 2025,
 	url          = { https://huggingface.co/infly/inf-retriever-v1 },
 	doi          = { 10.57967/hf/4262 },
 	publisher    = { Hugging Face }
 }
 ```
--- a/added_tokens.json
+++ b/added_tokens.json
@@ -0,0 +1,5 @@
 {
  "<|endoftext|>": 151643,
  "<|im_end|>": 151645,
  "<|im_start|>": 151644
 }
--- a/config.json
+++ b/config.json
@@ -0,0 +1,34 @@
 {
  "architectures": [
    "Qwen2Model"
  ],
  "attention_dropout": 0.0,
  "auto_map": {
    "AutoModel": "modeling_qwen.Qwen2Model",
    "AutoModelForCausalLM": "modeling_qwen.Qwen2ForCausalLM",
    "AutoModelForSequenceClassification": "modeling_qwen.Qwen2ForSequenceClassification"
  },
  "bos_token_id": 151643,
  "eos_token_id": 151643,
  "hidden_act": "silu",
  "hidden_size": 1536,
  "initializer_range": 0.02,
  "intermediate_size": 8960,
  "is_causal": false,
  "max_position_embeddings": 131072,
  "max_window_layers": 21,
  "model_type": "qwen2",
  "num_attention_heads": 12,
  "num_hidden_layers": 28,
  "num_key_value_heads": 2,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "rope_theta": 1000000.0,
  "sliding_window": null,
  "tie_word_embeddings": false,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.48.1",
  "use_cache": true,
  "use_sliding_window": false,
  "vocab_size": 151646
 }
--- a/config_sentence_transformers.json
+++ b/config_sentence_transformers.json
@@ -0,0 +1,11 @@
 {
  "__version__": {
    "sentence_transformers": "2.7.0",
    "transformers": "4.39.3",
    "pytorch": "2.1.0+cu121"
  },
  "prompts": {
    "query": "Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery: "
  },
  "default_prompt_name": null
 }
--- a/merges.txt
+++ b/merges.txt
--- a/model.safetensors
+++ b/model.safetensors
@@ -0,0 +1,3 @@
 version https://git-lfs.github.com/spec/v1
 oid sha256:19d559236e474d618fe407d417083c365227a7618f04381d71c178c99245278e
 size 3086574240
--- a/modeling_qwen.py
+++ b/modeling_qwen.py
--- a/modules.json
+++ b/modules.json
@@ -0,0 +1,20 @@
 [
  {
    "idx": 0,
    "name": "0",
    "path": "",
    "type": "sentence_transformers.models.Transformer"
  },
  {
    "idx": 1,
    "name": "1",
    "path": "1_Pooling",
    "type": "sentence_transformers.models.Pooling"
  },
  {
    "idx": 2,
    "name": "2",
    "path": "2_Normalize",
    "type": "sentence_transformers.models.Normalize"
  }
 ]
--- a/sentence_bert_config.json
+++ b/sentence_bert_config.json
@@ -0,0 +1,4 @@
 {
  "max_seq_length": 32768,
  "do_lower_case": false
 }
--- a/special_tokens_map.json
+++ b/special_tokens_map.json
@@ -0,0 +1,20 @@
 {
  "additional_special_tokens": [
    "<|im_start|>",
    "<|im_end|>"
  ],
  "eos_token": {
    "content": "<|endoftext|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  },
  "pad_token": {
    "content": "<|endoftext|>",
    "lstrip": false,
    "normalized": false,
    "rstrip": false,
    "single_word": false
  }
 }
--- a/tokenization_qwen.py
+++ b/tokenization_qwen.py
@@ -0,0 +1,267 @@
 from typing import List, Optional
 from transformers.models.qwen2.tokenization_qwen2 import Qwen2Tokenizer as OriginalQwen2Tokenizer
 from transformers.models.qwen2.tokenization_qwen2_fast import Qwen2TokenizerFast as OriginalQwen2TokenizerFast
 from tokenizers import processors
 VOCAB_FILES_NAMES = {
    "vocab_file": "vocab.json",
    "merges_file": "merges.txt",
    "tokenizer_file": "tokenizer.json",
 }
 class Qwen2Tokenizer(OriginalQwen2Tokenizer):
    """
    Construct a Qwen2 tokenizer. Based on byte-level Byte-Pair-Encoding.
    Same with GPT2Tokenizer, this tokenizer has been trained to treat spaces like parts of the tokens so a word will
    be encoded differently whether it is at the beginning of the sentence (without space) or not:
    ```python
    >>> from transformers import Qwen2Tokenizer
    >>> tokenizer = Qwen2Tokenizer.from_pretrained("Qwen/Qwen-tokenizer")
    >>> tokenizer("Hello world")["input_ids"]
    [9707, 1879]
    >>> tokenizer(" Hello world")["input_ids"]
    [21927, 1879]
    ```
    This is expected.
    You should not use GPT2Tokenizer instead, because of the different pretokenization rules.
    This tokenizer inherits from [`PreTrainedTokenizer`] which contains most of the main methods. Users should refer to
    this superclass for more information regarding those methods.
    Args:
        vocab_file (`str`):
            Path to the vocabulary file.
        merges_file (`str`):
            Path to the merges file.
        errors (`str`, *optional*, defaults to `"replace"`):
            Paradigm to follow when decoding bytes to UTF-8. See
            [bytes.decode](https://docs.python.org/3/library/stdtypes.html#bytes.decode) for more information.
        unk_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead.
        bos_token (`str`, *optional*):
            The beginning of sequence token. Not applicable for this tokenizer.
        eos_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
            The end of sequence token.
        pad_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
            The token used for padding, for example when batching sequences of different lengths.
        clean_up_tokenization_spaces (`bool`, *optional*, defaults to `False`):
            Whether or not the model should cleanup the spaces that were added when splitting the input text during the
            tokenization process. Not applicable to this tokenizer, since tokenization does not add spaces.
        split_special_tokens (`bool`, *optional*, defaults to `False`):
            Whether or not the special tokens should be split during the tokenization process. The default behavior is
            to not split special tokens. This means that if `<|endoftext|>` is the `eos_token`, then `tokenizer.tokenize("<|endoftext|>") =
            ['<|endoftext|>`]. Otherwise, if `split_special_tokens=True`, then `tokenizer.tokenize("<|endoftext|>")` will be give `['<',
            '|', 'endo', 'ft', 'ext', '|', '>']`. This argument is only supported for `slow` tokenizers for the moment.
        add_eos_token (`bool`, *optional*, defaults to `False`):
            Whether or not to add an `eos_token` at the end of sequences.
    """
    def __init__(
        self,
        vocab_file,
        merges_file,
        errors="replace",
        unk_token="<|endoftext|>",
        bos_token=None,
        eos_token="<|endoftext|>",
        pad_token="<|endoftext|>",
        clean_up_tokenization_spaces=False,
        split_special_tokens=False,
        add_eos_token=False,
        **kwargs,
    ):
        # The add_eos_token code was inspired by the LlamaTokenizer
        self.add_eos_token = add_eos_token
        super().__init__(
            vocab_file=vocab_file,
            merges_file=merges_file,
            errors=errors,
            unk_token=unk_token,
            bos_token=bos_token,
            eos_token=eos_token,
            pad_token=pad_token,
            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
            split_special_tokens=split_special_tokens,
            add_eos_token=add_eos_token,
            **kwargs,
        )
    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
        eos_token_id = [self.eos_token_id] if self.add_eos_token else []
        output = token_ids_0 + eos_token_id
        if token_ids_1 is not None:
            output = output + token_ids_1 + eos_token_id
        return output
    def get_special_tokens_mask(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None, already_has_special_tokens: bool = False
    ) -> List[int]:
        """
        Retrieve sequence ids from a token list that has no special tokens added. This method is called when adding
        special tokens using the tokenizer `prepare_for_model` method.
        Args:
            token_ids_0 (`List[int]`):
                List of IDs.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.
            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
                Whether or not the token list is already formatted with special tokens for the model.
        Returns:
            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
        """
        if already_has_special_tokens:
            return super().get_special_tokens_mask(
                token_ids_0=token_ids_0, token_ids_1=token_ids_1, already_has_special_tokens=True
            )
        eos_token_id = [1] if self.add_eos_token else []
        if token_ids_1 is None:
            return ([0] * len(token_ids_0)) + eos_token_id
        return (
            ([0] * len(token_ids_0))
            + eos_token_id
            + ([0] * len(token_ids_1))
            + eos_token_id
        )
    def create_token_type_ids_from_sequences(
        self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None
    ) -> List[int]:
        """
        Creates a mask from the two sequences passed to be used in a sequence-pair classification task. An ALBERT
        sequence pair mask has the following format:
        ```
        0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1
        | first sequence    | second sequence |
        ```
        if token_ids_1 is None, only returns the first portion of the mask (0s).
        Args:
            token_ids_0 (`List[int]`):
                List of ids.
            token_ids_1 (`List[int]`, *optional*):
                Optional second list of IDs for sequence pairs.
        Returns:
            `List[int]`: List of [token type IDs](../glossary#token-type-ids) according to the given sequence(s).
        """
        eos_token_id = [self.eos_token_id] if self.add_eos_token else []
        output = [0] * len(token_ids_0 + eos_token_id)
        if token_ids_1 is not None:
            output += [1] * len(token_ids_1 + eos_token_id)
        return output
 class Qwen2TokenizerFast(OriginalQwen2TokenizerFast):
    """
    Construct a "fast" Qwen2 tokenizer (backed by HuggingFace's *tokenizers* library). Based on byte-level
    Byte-Pair-Encoding.
    Same with GPT2Tokenizer, this tokenizer has been trained to treat spaces like parts of the tokens so a word will
    be encoded differently whether it is at the beginning of the sentence (without space) or not:
    ```python
    >>> from transformers import Qwen2TokenizerFast
    >>> tokenizer = Qwen2TokenizerFast.from_pretrained("Qwen/Qwen-tokenizer")
    >>> tokenizer("Hello world")["input_ids"]
    [9707, 1879]
    >>> tokenizer(" Hello world")["input_ids"]
    [21927, 1879]
    ```
    This is expected.
    This tokenizer inherits from [`PreTrainedTokenizerFast`] which contains most of the main methods. Users should
    refer to this superclass for more information regarding those methods.
    Args:
        vocab_file (`str`, *optional*):
            Path to the vocabulary file.
        merges_file (`str`, *optional*):
            Path to the merges file.
        tokenizer_file (`str`, *optional*):
            Path to [tokenizers](https://github.com/huggingface/tokenizers) file (generally has a .json extension) that
            contains everything needed to load the tokenizer.
        unk_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
            The unknown token. A token that is not in the vocabulary cannot be converted to an ID and is set to be this
            token instead. Not applicable to this tokenizer.
        bos_token (`str`, *optional*):
            The beginning of sequence token. Not applicable for this tokenizer.
        eos_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
            The end of sequence token.
        pad_token (`str`, *optional*, defaults to `"<|endoftext|>"`):
            The token used for padding, for example when batching sequences of different lengths.
        add_eos_token (`bool`, *optional*, defaults to `False`):
            Whether or not to add an `eos_token` at the end of sequences.
    """
    slow_tokenizer_class = Qwen2Tokenizer
    padding_side = "left"
    def __init__(
        self,
        vocab_file=None,
        merges_file=None,
        tokenizer_file=None,
        unk_token="<|endoftext|>",
        bos_token=None,
        eos_token="<|endoftext|>",
        pad_token="<|endoftext|>",
        add_eos_token=False,
        **kwargs,
    ):
        super().__init__(
            vocab_file=vocab_file,
            merges_file=merges_file,
            tokenizer_file=tokenizer_file,
            unk_token=unk_token,
            bos_token=bos_token,
            eos_token=eos_token,
            pad_token=pad_token,
            **kwargs,
        )
        self._add_eos_token = add_eos_token
        self.update_post_processor()
    def update_post_processor(self):
        """
        Updates the underlying post processor with the current `eos_token`.
        """
        eos = self.eos_token
        eos_token_id = self.eos_token_id
        if eos is None and self.add_eos_token:
            raise ValueError("add_eos_token = True but eos_token = None")
        single = f"$A:0{(' '+eos+':0') if self.add_eos_token else ''}"
        pair = f"{single} $B:1{(' '+eos+':1') if self.add_eos_token else ''}"
        special_tokens = []
        if self.add_eos_token:
            special_tokens.append((eos, eos_token_id))
        self._tokenizer.post_processor = processors.TemplateProcessing(
            single=single, pair=pair, special_tokens=special_tokens
        )
    @property
    def add_eos_token(self):
        return self._add_eos_token
--- a/tokenizer.json
+++ b/tokenizer.json
--- a/tokenizer_config.json
+++ b/tokenizer_config.json
@@ -0,0 +1,50 @@
 {
  "add_eos_token": true,
  "add_prefix_space": false,
  "added_tokens_decoder": {
    "151643": {
      "content": "<|endoftext|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151644": {
      "content": "<|im_start|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "151645": {
      "content": "<|im_end|>",
      "lstrip": false,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    }
  },
  "additional_special_tokens": [
    "<|im_start|>",
    "<|im_end|>"
  ],
  "auto_map": {
    "AutoTokenizer": [
      "tokenization_qwen.Qwen2Tokenizer",
      "tokenization_qwen.Qwen2TokenizerFast"
    ]
  },
  "bos_token": null,
  "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
  "clean_up_tokenization_spaces": false,
  "eos_token": "<|endoftext|>",
  "errors": "replace",
  "model_max_length": 32768,
  "pad_token": "<|endoftext|>",
  "split_special_tokens": false,
  "tokenizer_class": "Qwen2Tokenizer",
  "unk_token": null
 }
--- a/vocab.json
+++ b/vocab.json