commit 9a16ab7266145aa93e1c8a413cadf3595ebaf66c
Author: ModelHub XC <noreply@modelhub.org.cn>
Date:   Wed Apr 29 23:00:52 2026 +0800

    初始化项目，由ModelHub XC社区提供模型
    
    Model: RedHatAI/granite-3.1-2b-base-quantized.w8a8
    Source: Original Platform

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..a6344aa
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,35 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..dbd6798
--- /dev/null
+++ b/README.md
@@ -0,0 +1,463 @@
+---
+tags:
+- w8a8
+- int8
+- vllm
+license: apache-2.0
+license_link: https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md
+language:
+  - en
+base_model: ibm-granite/granite-3.1-2b-base
+library_name: transformers
+---
+
+# granite-3.1-2b-base-quantized.w8a8
+
+## Model Overview
+- **Model Architecture:** granite-3.1-2b-base
+  - **Input:** Text
+  - **Output:** Text
+- **Model Optimizations:**
+  - **Weight quantization:** INT8
+  - **Activation quantization:** INT8
+- **Release Date:** 1/8/2025
+- **Version:** 1.0
+- **Model Developers:** Neural Magic
+
+Quantized version of [ibm-granite/granite-3.1-2b-base](https://huggingface.co/ibm-granite/granite-3.1-2b-base).
+It achieves an average score of 57.22 on the [OpenLLM](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard) benchmark (version 1), whereas the unquantized model achieves 57.65.
+
+### Model Optimizations
+
+This model was obtained by quantizing the weights and activations of [ibm-granite/granite-3.1-2b-base](https://huggingface.co/ibm-granite/granite-3.1-2b-base) to INT8 data type, ready for inference with vLLM >= 0.5.2.
+This optimization reduces the number of bits per parameter from 16 to 8, reducing the disk size and GPU memory requirements by approximately 50%. Only the weights and activations of the linear operators within transformers blocks are quantized. 
+
+## Deployment
+
+### Use with vLLM
+
+This model can be deployed efficiently using the [vLLM](https://docs.vllm.ai/en/latest/) backend, as shown in the example below.
+
+```python
+from transformers import AutoTokenizer
+from vllm import LLM, SamplingParams
+
+max_model_len, tp_size = 4096, 1
+model_name = "neuralmagic/granite-3.1-2b-base-quantized.w8a8"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+llm = LLM(model=model_name, tensor_parallel_size=tp_size, max_model_len=max_model_len, trust_remote_code=True)
+sampling_params = SamplingParams(temperature=0.3, max_tokens=256, stop_token_ids=[tokenizer.eos_token_id])
+
+messages_list = [
+    [{"role": "user", "content": "Who are you? Please respond in pirate speak!"}],
+]
+
+prompt_token_ids = [tokenizer.apply_chat_template(messages, add_generation_prompt=True) for messages in messages_list]
+
+outputs = llm.generate(prompt_token_ids=prompt_token_ids, sampling_params=sampling_params)
+
+generated_text = [output.outputs[0].text for output in outputs]
+print(generated_text)
+```
+
+vLLM also supports OpenAI-compatible serving. See the [documentation](https://docs.vllm.ai/en/latest/) for more details.
+
+## Creation
+
+This model was created with [llm-compressor](https://github.com/vllm-project/llm-compressor) by running the code snippet below. 
+
+<details>
+  <summary>Model Creation Code</summary>
+
+```bash
+python quantize.py --model_path ibm-granite/granite-3.1-2b-base --quant_path "output_dir/granite-3.1-2b-base-quantized.w8a8" --calib_size 1024 --dampening_frac 0.01 --observer mse
+```
+
+
+```python
+from datasets import load_dataset
+from transformers import AutoTokenizer
+from llmcompressor.modifiers.quantization import GPTQModifier
+from llmcompressor.modifiers.smoothquant import SmoothQuantModifier
+from llmcompressor.transformers import SparseAutoModelForCausalLM, oneshot, apply
+import argparse
+from compressed_tensors.quantization import QuantizationScheme, QuantizationArgs, QuantizationType, QuantizationStrategy
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--model_path', type=str)
+parser.add_argument('--quant_path', type=str)
+parser.add_argument('--calib_size', type=int, default=256)
+parser.add_argument('--dampening_frac', type=float, default=0.1) 
+parser.add_argument('--observer', type=str, default="minmax")
+args = parser.parse_args()
+
+model = SparseAutoModelForCausalLM.from_pretrained(
+    args.model_path,
+    device_map="auto",
+    torch_dtype="auto",
+    use_cache=False,
+    trust_remote_code=True,
+)
+tokenizer = AutoTokenizer.from_pretrained(args.model_path)
+
+NUM_CALIBRATION_SAMPLES = args.calib_size
+DATASET_ID = "neuralmagic/LLM_compression_calibration"
+DATASET_SPLIT = "train"
+ds = load_dataset(DATASET_ID, split=DATASET_SPLIT)
+ds = ds.shuffle(seed=42).select(range(NUM_CALIBRATION_SAMPLES))
+
+def preprocess(example):
+    return {"text": example["text"]}
+
+ds = ds.map(preprocess)
+
+def tokenize(sample):
+    return tokenizer(
+        sample["text"],
+        padding=False,
+        truncation=False,
+        add_special_tokens=True,
+    )
+
+
+ds = ds.map(tokenize, remove_columns=ds.column_names)
+
+ignore=["lm_head"]
+mappings=[
+    [["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"], "re:.*input_layernorm"],
+    [["re:.*gate_proj", "re:.*up_proj"], "re:.*post_attention_layernorm"],
+    [["re:.*down_proj"], "re:.*up_proj"]
+]
+
+recipe = [
+    SmoothQuantModifier(smoothing_strength=0.7, ignore=ignore, mappings=mappings),
+    GPTQModifier(
+        targets=["Linear"],
+        ignore=["lm_head"],
+        scheme="W8A8",
+        dampening_frac=args.dampening_frac,
+        observer=args.observer,
+    )
+]
+oneshot(
+    model=model,
+    dataset=ds,
+    recipe=recipe,
+    num_calibration_samples=args.calib_size,
+    max_seq_length=8196,
+)
+
+# Save to disk compressed.
+model.save_pretrained(quant_path, save_compressed=True)
+tokenizer.save_pretrained(quant_path)
+```
+</details>
+
+## Evaluation
+
+The model was evaluated on OpenLLM Leaderboard [V1](https://huggingface.co/spaces/open-llm-leaderboard-old/open_llm_leaderboard), OpenLLM Leaderboard [V2](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/) and on [HumanEval](https://github.com/neuralmagic/evalplus), using the following commands:
+
+<details>
+<summary>Evaluation Commands</summary>
+
+OpenLLM Leaderboard V1:
+```
+lm_eval \
+  --model vllm \
+  --model_args pretrained="neuralmagic/granite-3.1-2b-base-quantized.w8a8",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1,gpu_memory_utilization=0.8,enable_chunked_prefill=True,trust_remote_code=True \
+  --tasks openllm \
+  --write_out \
+  --batch_size auto \
+  --output_path output_dir \
+  --show_config
+```
+
+#### HumanEval
+##### Generation
+```
+python3 codegen/generate.py \
+  --model neuralmagic/granite-3.1-2b-base-quantized.w8a8 \
+  --bs 16 \
+  --temperature 0.2 \
+  --n_samples 50 \
+  --root "." \
+  --dataset humaneval
+```
+##### Sanitization
+```
+python3 evalplus/sanitize.py \
+  humaneval/neuralmagic--granite-3.1-2b-base-quantized.w8a8_vllm_temp_0.2
+```
+##### Evaluation
+```
+evalplus.evaluate \
+  --dataset humaneval \
+  --samples humaneval/neuralmagic--granite-3.1-2b-base-quantized.w8a8_vllm_temp_0.2-sanitized
+```
+</details>
+
+### Accuracy
+
+<table>
+  <thead>
+    <tr>
+      <th>Category</th>
+      <th>Metric</th>
+      <th>ibm-granite/granite-3.1-2b-base</th>
+      <th>neuralmagic/granite-3.1-2b-base-quantized.w8a8</th>
+      <th>Recovery (%)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td rowspan="7"><b>OpenLLM V1</b></td>
+      <td>ARC-Challenge (Acc-Norm, 25-shot)</td>
+      <td>53.75</td>
+      <td>54.01</td>
+      <td>100.48</td>
+    </tr>
+    <tr>
+      <td>GSM8K (Strict-Match, 5-shot)</td>
+      <td>47.84</td>
+      <td>46.55</td>
+      <td>97.30</td>
+    </tr>
+    <tr>
+      <td>HellaSwag (Acc-Norm, 10-shot)</td>
+      <td>77.94</td>
+      <td>77.94</td>
+      <td>100.00</td>
+    </tr>
+    <tr>
+      <td>MMLU (Acc, 5-shot)</td>
+      <td>52.88</td>
+      <td>52.34</td>
+      <td>98.98</td>
+    </tr>
+    <tr>
+      <td>TruthfulQA (MC2, 0-shot)</td>
+      <td>39.04</td>
+      <td>38.12</td>
+      <td>97.64</td>
+    </tr>
+    <tr>
+      <td>Winogrande (Acc, 5-shot)</td>
+      <td>74.43</td>
+      <td>74.35</td>
+      <td>99.89</td>
+    </tr>
+    <tr>
+      <td><b>Average Score</b></td>
+      <td><b>57.65</b></td>
+      <td><b>57.22</b></td>
+      <td><b>99.26</b></td>
+    </tr>
+    <tr>
+      <td rowspan="2"><b>Coding</b></td>
+      <td>HumanEval Pass@1</td>
+      <td>30.00</td>
+      <td>29.60</td>
+      <td><b>98.67</b></td>
+    </tr>
+  </tbody>
+</table>
+
+
+
+## Inference Performance
+
+
+This model achieves up to 1.4x speedup in single-stream deployment and up to 1.1x speedup in multi-stream asynchronous deployment, depending on hardware and use-case scenario.
+The following performance benchmarks were conducted with [vLLM](https://docs.vllm.ai/en/latest/) version 0.6.6.post1, and [GuideLLM](https://github.com/neuralmagic/guidellm).
+
+<details>
+<summary>Benchmarking Command</summary>
+
+```
+guidellm --model neuralmagic/granite-3.1-2b-base-quantized.w8a8 --target "http://localhost:8000/v1" --data-type emulated --data "prompt_tokens=<prompt_tokens>,generated_tokens=<generated_tokens>" --max seconds 360 --backend aiohttp_server
+```
+
+</details>
+
+
+### Single-stream performance (measured with vLLM version 0.6.6.post1)
+<table>
+  <tr>
+    <td></td>
+    <td></td>
+    <td></td>
+    <th style="text-align: center;" colspan="7" >Latency (s)</th>
+  </tr>
+  <tr>
+    <th>GPU class</th>
+    <th>Model</th>
+    <th>Speedup</th>
+    <th>Code Completion<br>prefill: 256 tokens<br>decode: 1024 tokens</th>
+    <th>Docstring Generation<br>prefill: 768 tokens<br>decode: 128 tokens</th>
+    <th>Code Fixing<br>prefill: 1024 tokens<br>decode: 1024 tokens</th>
+    <th>RAG<br>prefill: 1024 tokens<br>decode: 128 tokens</th>
+    <th>Instruction Following<br>prefill: 256 tokens<br>decode: 128 tokens</th>
+    <th>Multi-turn Chat<br>prefill: 512 tokens<br>decode: 256 tokens</th>
+    <th>Large Summarization<br>prefill: 4096 tokens<br>decode: 512 tokens</th>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;" rowspan="3" >A5000</td>
+    <td>granite-3.1-2b-base</td>
+    <td></td>
+    <td>10.9</td>
+    <td>1.4</td>
+    <td>11.0</td>
+    <td>1.5</td>
+    <td>1.4</td>
+    <td>2.8</td>
+    <td>6.1</td>
+  </tr>
+  <tr>
+    <td>granite-3.1-2b-base-quantized.w8a8<br>(this model)</td>
+    <td>1.37</td>
+    <td>7.9</td>
+    <td>1.0</td>
+    <td>8.0</td>
+    <td>1.1</td>
+    <td>1.0</td>
+    <td>2.0</td>
+    <td>4.7</td>
+  </tr>
+  <tr>
+    <td>granite-3.1-2b-base-quantized.w4a16</td>
+    <td>1.94</td>
+    <td>5.4</td>
+    <td>0.7</td>
+    <td>5.5</td>
+    <td>0.8</td>
+    <td>0.7</td>
+    <td>1.4</td>
+    <td>3.4</td>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;" rowspan="3" >A6000</td>
+    <td>granite-3.1-2b-base</td>
+    <td></td>
+    <td>9.8</td>
+    <td>1.3</td>
+    <td>10.0</td>
+    <td>1.3</td>
+    <td>1.3</td>
+    <td>2.6</td>
+    <td>5.4</td>
+  </tr>
+  <tr>
+    <td>granite-3.1-2b-base-quantized.w8a8<br>(this model)</td>
+    <td>1.31</td>
+    <td>7.8</td>
+    <td>1.0</td>
+    <td>7.6</td>
+    <td>1.0</td>
+    <td>0.9</td>
+    <td>1.9</td>
+    <td>4.5</td>
+  </tr>
+  <tr>
+    <td>granite-3.1-2b-base-quantized.w4a16</td>
+    <td>1.87</td>
+    <td>5.1</td>
+    <td>0.7</td>
+    <td>5.2</td>
+    <td>0.7</td>
+    <td>0.7</td>
+    <td>1.3</td>
+    <td>3.1</td>
+  </tr>
+</table>
+
+
+### Multi-stream asynchronous performance (measured with vLLM version 0.6.6.post1)
+<table>
+  <tr>
+    <td></td>
+    <td></td>
+    <td></td>
+    <th style="text-align: center;" colspan="7" >Maximum Throughput (Queries per Second)</th>
+  </tr>
+  <tr>
+    <th>GPU class</th>
+    <th>Model</th>
+    <th>Speedup</th>
+    <th>Code Completion<br>prefill: 256 tokens<br>decode: 1024 tokens</th>
+    <th>Docstring Generation<br>prefill: 768 tokens<br>decode: 128 tokens</th>
+    <th>Code Fixing<br>prefill: 1024 tokens<br>decode: 1024 tokens</th>
+    <th>RAG<br>prefill: 1024 tokens<br>decode: 128 tokens</th>
+    <th>Instruction Following<br>prefill: 256 tokens<br>decode: 128 tokens</th>
+    <th>Multi-turn Chat<br>prefill: 512 tokens<br>decode: 256 tokens</th>
+    <th>Large Summarization<br>prefill: 4096 tokens<br>decode: 512 tokens</th>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;" rowspan="3" >A5000</td>
+    <td>granite-3.1-2b-base</td>
+    <td></td>
+    <td>2.9</td>
+    <td>10.2</td>
+    <td>1.8</td>
+    <td>8.2</td>
+    <td>19.3</td>
+    <td>9.1</td>
+    <td>1.3</td>
+  </tr>
+  <tr>
+    <td>granite-3.1-2b-base-quantized.w8a8<br>(this model)</td>
+    <td>1.13</td>
+    <td>3.1</td>
+    <td>12.1</td>
+    <td>2.0</td>
+    <td>9.6</td>
+    <td>22.2</td>
+    <td>10.2</td>
+    <td>1.4</td>
+  </tr>
+  <tr>
+    <td>granite-3.1-2b-base-quantized.w4a16</td>
+    <td>0.98</td>
+    <td>2.8</td>
+    <td>10.0</td>
+    <td>1.8</td>
+    <td>8.1</td>
+    <td>18.6</td>
+    <td>9.0</td>
+    <td>1.2</td>
+  </tr>
+  <tr>
+    <td style="vertical-align: middle;" rowspan="3" >A6000</td>
+    <td>granite-3.1-2b-base</td>
+    <td></td>
+    <td>3.7</td>
+    <td>12.4</td>
+    <td>2.4</td>
+    <td>10.3</td>
+    <td>23.6</td>
+    <td>11.0</td>
+    <td>1.6</td>
+  </tr>
+  <tr>
+    <td>granite-3.1-2b-base-quantized.w8a8<br>(this model)</td>
+    <td>1.12</td>
+    <td>3.6</td>
+    <td>14.4</td>
+    <td>2.7</td>
+    <td>12.0</td>
+    <td>28.3</td>
+    <td>12.9</td>
+    <td>1.7</td>
+  </tr>
+  <tr>
+    <td>granite-3.1-2b-base-quantized.w4a16</td>
+    <td>0.95</td>
+    <td>3.7</td>
+    <td>11.4</td>
+    <td>2.5</td>
+    <td>9.8</td>
+    <td>22.1</td>
+    <td>10.4</td>
+    <td>1.4</td>
+  </tr>
+</table>
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..9e47553
--- /dev/null
+++ b/config.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:64de972a12d170123a7aa4281f051a1b9ae16a45bc2380d605fc334e4254089f
+size 1906
diff --git a/configuration.json b/configuration.json
new file mode 100644
index 0000000..bbeeda1
--- /dev/null
+++ b/configuration.json
@@ -0,0 +1 @@
+{"framework": "pytorch", "task": "text-generation", "allow_remote": true}
\ No newline at end of file
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..614b75d
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,7 @@
+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "eos_token_id": 0,
+  "pad_token_id": 0,
+  "transformers_version": "4.47.1"
+}
diff --git a/merges.txt b/merges.txt
new file mode 100644
index 0000000..f9f6899
--- /dev/null
+++ b/merges.txt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:303127a244b0078878156c17229f36d11b7a3a3f8e47b7cfdbb304ff46be5030
+size 441810
diff --git a/model.safetensors b/model.safetensors
new file mode 100644
index 0000000..c26caaa
--- /dev/null
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f56a1e0d6926e6042eef5f15b88daba17e5c547af4080db1e563561f0f2d4f6f
+size 2837639392
diff --git a/recipe.yaml b/recipe.yaml
new file mode 100644
index 0000000..2c73d48
--- /dev/null
+++ b/recipe.yaml
@@ -0,0 +1,21 @@
+quant_stage:
+  quant_modifiers:
+    SmoothQuantModifier:
+      smoothing_strength: 0.7
+      mappings:
+      - - ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj']
+        - re:.*input_layernorm
+      - - ['re:.*gate_proj', 're:.*up_proj']
+        - re:.*post_attention_layernorm
+      - - ['re:.*down_proj']
+        - re:.*up_proj
+    GPTQModifier:
+      sequential_update: true
+      dampening_frac: 0.01
+      ignore: [lm_head]
+      config_groups:
+        group_0:
+          targets: [Linear]
+          weights: {num_bits: 8, type: int, symmetric: true, strategy: channel, observer: mse}
+          input_activations: {num_bits: 8, type: int, symmetric: true, strategy: token, dynamic: true,
+            observer: memoryless}
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000..7dec8d5
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,51 @@
+{
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "<fim_prefix>",
+    "<fim_middle>",
+    "<fim_suffix>",
+    "<fim_pad>",
+    "<filename>",
+    "<gh_stars>",
+    "<issue_start>",
+    "<issue_comment>",
+    "<issue_closed>",
+    "<jupyter_start>",
+    "<jupyter_text>",
+    "<jupyter_code>",
+    "<jupyter_output>",
+    "<empty_output>",
+    "<commit_before>",
+    "<commit_msg>",
+    "<commit_after>",
+    "<reponame>"
+  ],
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}
diff --git a/tokenizer.json b/tokenizer.json
new file mode 100644
index 0000000..0f72a12
--- /dev/null
+++ b/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b05515b3c0fced3086c56484444669b1a99a86a8e843dba0bd7c48ac609e9d6
+size 3475389
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000..2047874
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a65886cabc33b82795df74cbcf99e88f4be3e93a72808eb7c006a8ae3d66afb3
+size 4156
diff --git a/vocab.json b/vocab.json
new file mode 100644
index 0000000..fdc0b09
--- /dev/null
+++ b/vocab.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20175afb9f164fad4829aca2279f8df7eeff1e2e3f671378aaa287a740aff09f
+size 776993