From 1dd00ffc853b932ddf9b26074a2ea0aa168de718 Mon Sep 17 00:00:00 2001
From: ModelHub XC <noreply@modelhub.org.cn>
Date: Sat, 2 May 2026 02:29:55 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?=
 =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?=
 =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Model: zl2272001/selfrag_llama2_7b
Source: Original Platform
---
 .gitattributes                   | 35 ++++++++++++++++
 README.md                        | 71 ++++++++++++++++++++++++++++++++
 added_tokens.json                | 18 ++++++++
 config.json                      | 26 ++++++++++++
 configuration.json               |  1 +
 generation_config.json           | 10 +++++
 pytorch_model-00001-of-00002.bin |  3 ++
 pytorch_model-00002-of-00002.bin |  3 ++
 pytorch_model.bin.index.json     |  3 ++
 special_tokens_map.json          | 23 +++++++++++
 tokenizer.model                  |  3 ++
 tokenizer_config.json            | 37 +++++++++++++++++
 12 files changed, 233 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 README.md
 create mode 100644 added_tokens.json
 create mode 100644 config.json
 create mode 100644 configuration.json
 create mode 100644 generation_config.json
 create mode 100644 pytorch_model-00001-of-00002.bin
 create mode 100644 pytorch_model-00002-of-00002.bin
 create mode 100644 pytorch_model.bin.index.json
 create mode 100644 special_tokens_map.json
 create mode 100644 tokenizer.model
 create mode 100644 tokenizer_config.json

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..a6344aa
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c593d43
--- /dev/null
+++ b/README.md
@@ -0,0 +1,71 @@
+---
+license: mit
+---
+This model is a 7B [Self-RAG](https://selfrag.github.io/) model that generates outputs to diverse user queries as well as *reflection tokens* to call the retrieval system adaptively and criticize its own output and retrieved passages.  
+
+Self-RAG is trained on our instruction-following corpora with interleaving passages and reflection tokens using the standard next-token prediction objective, enabling efficient and stable learning with fine-grained feedback.  
+At inference, we leverage reflection tokens covering diverse aspects of generations to sample the best output aligning users' preferences. 
+See full descriptions in See full descriptions in [our paper](https://arxiv.org/abs/2310.11511). 
+
+## Usage
+Here, we show an easy way to quickly download our model from HuggingFace and run with `vllm` with pre-given passages. Make sure to install dependencies listed at [self-rag/requirements.txt](https://github.com/AkariAsai/self-rag/blob/main/requirements.txt). 
+To run our full inference pipeline with a retrieval system and fine-grained tree decoding, please use [our code](https://github.com/AkariAsai/self-rag). 
+
+```py
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from vllm import LLM, SamplingParams
+
+model = LLM("selfrag/selfrag_llama2_7b", download_dir="/gscratch/h2lab/akari/model_cache", dtype="half")
+sampling_params = SamplingParams(temperature=0.0, top_p=1.0, max_tokens=100, skip_special_tokens=False)
+
+def format_prompt(input, paragraph=None):
+  prompt = "### Instruction:\n{0}\n\n### Response:\n".format(input)
+  if paragraph is not None:
+    prompt += "[Retrieval]<paragraph>{0}</paragraph>".format(paragraph)
+  return prompt
+
+query_1 = "Leave odd one out: twitter, instagram, whatsapp."
+query_2 = "Can you tell me the difference between llamas and alpacas?"
+queries = [query_1, query_2]
+
+preds = model.generate([format_prompt(query) for query in queries], sampling_params)
+for pred in preds:
+  print("Model prediction: {0}".format(pred.outputs[0].text))
+# Model prediction: Twitter, Instagram, and WhatsApp are all social media platforms.[No Retrieval]WhatsApp is the odd one out because it is a messaging app, while Twitter and # Instagram are primarily used for sharing photos and videos.[Utility:5]</s> (this query doesn't require factual grounding; just skip retrieval and do normal instruction-following generation)
+# Model prediction: Sure![Retrieval]<paragraph> ... (this query requires factual grounding, call a retriever)
+
+# generate with retrieved passage
+prompt = format_prompt("Can you tell me the difference between llamas and alpacas?", paragraph="The alpaca (Lama pacos) is a species of South American camelid mammal. It is similar to, and often confused with, the llama. Alpacas are considerably smaller than llamas, and unlike llamas, they were not bred to be working animals, but were bred specifically for their fiber.")
+preds = model.generate([prompt], sampling_params)
+print([pred.outputs[0].text for pred in preds])
+# ['[Relevant]Alpacas are considerably smaller than llamas, and unlike llamas, they were not bred to be working animals, but were bred specifically for their fiber.[Fully supported][Utility:5]</s>']
+```
+
+## Input Format
+As described in the `format_prompt` function, your input should be formed as 
+```
+### Instruction:\n{instruction}\n\n### Response:\n".format(instruction)
+```
+or
+```
+### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
+```
+If you have additional input. 
+You can insert paragraphs anywhere after `### Response:\n"`, but make sure to mark paragraphs as paragraph tokens (i.e., `<paragraph>{0}</paragraph>`).
+
+## Training details
+Our training data is available at the HuggingFace dataset [selfrag_train_data](https://huggingface.co/datasets/selfrag/selfrag_train_data). 
+See our official repository for the training details. 
+We used 8 A100 40GB for training on the Stability HPC server.
+
+## Citation and contact
+If you use this model, please cite our work: 
+```
+@article{asai2023selfrag,
+  author    = {Asai, Akari and Wu, Zeqiu and Wang, Yizhong and Sil, Avirup and Hajishirzi, Hannaneh},
+  title     = {{Self-RAG}: Learning to Retrieve, Generate, and Critique through Self-Reflection},
+  year      = {2023},
+  journal   = { arXiv preprint arXiv:2310.11511 },
+  URL       = {https://arxiv.org/abs/2310.11511}
+}
+```
\ No newline at end of file
diff --git a/added_tokens.json b/added_tokens.json
new file mode 100644
index 0000000..9812cb8
--- /dev/null
+++ b/added_tokens.json
@@ -0,0 +1,18 @@
+{
+  "</paragraph>": 32006,
+  "<pad>": 32015,
+  "<paragraph>": 32005,
+  "[Continue to Use Evidence]": 32002,
+  "[Fully supported]": 32012,
+  "[Irrelevant]": 32003,
+  "[No Retrieval]": 32000,
+  "[No support / Contradictory]": 32014,
+  "[Partially supported]": 32013,
+  "[Relevant]": 32004,
+  "[Retrieval]": 32001,
+  "[Utility:1]": 32007,
+  "[Utility:2]": 32008,
+  "[Utility:3]": 32009,
+  "[Utility:4]": 32010,
+  "[Utility:5]": 32011
+}
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..9bd5436
--- /dev/null
+++ b/config.json
@@ -0,0 +1,26 @@
+{
+  "_name_or_path": "meta-llama/Llama-2-7b-hf",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 11008,
+  "max_position_embeddings": 4096,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 32,
+  "pretraining_tp": 1,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.34.0.dev0",
+  "use_cache": true,
+  "vocab_size": 32016
+}
diff --git a/configuration.json b/configuration.json
new file mode 100644
index 0000000..f9291c3
--- /dev/null
+++ b/configuration.json
@@ -0,0 +1 @@
+{"framework":"Pytorch","task":"text-generation"}
\ No newline at end of file
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..f0e05f0
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,10 @@
+{
+  "bos_token_id": 1,
+  "do_sample": true,
+  "eos_token_id": 2,
+  "max_length": 4096,
+  "pad_token_id": 0,
+  "temperature": 0.6,
+  "top_p": 0.9,
+  "transformers_version": "4.34.0.dev0"
+}
diff --git a/pytorch_model-00001-of-00002.bin b/pytorch_model-00001-of-00002.bin
new file mode 100644
index 0000000..d2fb8d5
--- /dev/null
+++ b/pytorch_model-00001-of-00002.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5c726ea09de32bff5ad0053f98bfd68847e18830d54b4c3d08b67c270380bbd1
+size 9976751194
diff --git a/pytorch_model-00002-of-00002.bin b/pytorch_model-00002-of-00002.bin
new file mode 100644
index 0000000..adbc9cb
--- /dev/null
+++ b/pytorch_model-00002-of-00002.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0883291fa5ac8b05ff5afb22cf08921be94c34988424a5e9ae3931663ad8ed93
+size 3500441859
diff --git a/pytorch_model.bin.index.json b/pytorch_model.bin.index.json
new file mode 100644
index 0000000..c026802
--- /dev/null
+++ b/pytorch_model.bin.index.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ca9b8f5a8f500eafd4bdcbd52898a75834f2f5d596630cdca4da3c30034b625
+size 23950
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000..7633d0a
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+  "additional_special_tokens": [
+    "[No Retrieval]",
+    "[Retrieval]",
+    "[Continue to Use Evidence]",
+    "[Irrelevant]",
+    "[Relevant]",
+    "<paragraph>",
+    "</paragraph>",
+    "[Utility:1]",
+    "[Utility:2]",
+    "[Utility:3]",
+    "[Utility:4]",
+    "[Utility:5]",
+    "[Fully supported]",
+    "[Partially supported]",
+    "[No support / Contradictory]"
+  ],
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000..6c00c74
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000..16ada5c
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,37 @@
+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "clean_up_tokenization_spaces": false,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": null,
+  "padding_side": "right",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "use_default_system_prompt": true
+}