From 6d11f50eb12480ed45c1954e69d6b38218f540a2 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Sun, 14 Jun 2026 05:08:12 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: LiquidAI/LFM2-350M-PII-Extract-JP Source: Original Platform --- .gitattributes | 50 + LICENSE | 71 + README.md | 294 +++ chat_template.jinja | 50 + config.json | 59 + configuration.json | 1 + generation_config.json | 7 + model.safetensors | 3 + special_tokens_map.json | 23 + tokenizer.json | 3 + tokenizer_config.json | 4078 +++++++++++++++++++++++++++++++++++++++ 11 files changed, 4639 insertions(+) create mode 100644 .gitattributes create mode 100644 LICENSE create mode 100644 README.md create mode 100644 chat_template.jinja create mode 100644 config.json create mode 100644 configuration.json create mode 100644 generation_config.json create mode 100644 model.safetensors create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..16332c1 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,50 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bin.* filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zstandard filter=lfs diff=lfs merge=lfs -text +*.tfevents* filter=lfs diff=lfs merge=lfs -text +*.db* filter=lfs diff=lfs merge=lfs -text +*.ark* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text +**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text + +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.gguf* filter=lfs diff=lfs merge=lfs -text +*.ggml filter=lfs diff=lfs merge=lfs -text +*.llamafile* filter=lfs diff=lfs merge=lfs -text +*.pt2 filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text + +tokenizer.json filter=lfs diff=lfs merge=lfs -text +model.safetensors filter=lfs diff=lfs merge=lfs -text \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..25e731c --- /dev/null +++ b/LICENSE @@ -0,0 +1,71 @@ +LFM Open License v1.0 + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + +"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by this document. + +"Licensor" shall mean Liquid AI, Inc. + +"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. + +"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. + +"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. + +"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. + +"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work. + +"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. + +"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." + +"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. + +"Commercial Use" shall mean any use of the Work for direct or indirect commercial advantage or monetary compensation. + +"Qualified Non-Profit Organization" shall mean a Legal Entity that is organized and operated exclusively for religious, charitable, scientific, testing for public safety, literary, or educational purposes, and which is exempt from federal income tax under Section 501(c)(3) of the United States Internal Revenue Code of 1986, as amended, or any equivalent non-profit or charitable organization in a foreign jurisdiction. + +"Non-Commercial or Research Purposes" shall mean purposes that do not involve any use of the Work or a Derivative Work for Commercial Use. + +"Threshold" shall mean annual revenue of 10 million United States dollars ($10,000,000) or more. + +2. Grant of Copyright License. Subject to the terms and conditions of this License, including the Commercial Use limitation set forth in Section 5, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of this License, including the Commercial Use limitation set forth in Section 5, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: + +(a) You must give any other recipients of the Work or Derivative Works a copy of this License; and + +(b) You must cause any modified files to carry prominent notices stating that You changed the files; and + +(c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and + +(d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. + +You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. + +5. Commercial Use Limitation. + +(a) The rights granted under this License for Commercial Use are conditioned upon You or Your Legal Entity not exceeding the Threshold. + +(b) Any Commercial Use of the Work or a Derivative Work by a Legal Entity that exceeds the Threshold is not licensed under this Agreement. + +(c) The Threshold shall not apply to a Qualified Non-Profit Organization's use of the Work or a Derivative Work for Non-Commercial or Research Purposes. + +6. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. + +7. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except for the reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. + +8. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. + +9. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. + +10. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. + +11. Termination. This License will terminate automatically and immediately if You fail to comply with any of its terms and conditions. Upon termination, You must cease all use of the Work and any Derivative Works and delete all copies in Your possession. + +END OF TERMS AND CONDITIONS \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..057fc81 --- /dev/null +++ b/README.md @@ -0,0 +1,294 @@ +--- +library_name: transformers +language: +- ja +base_model: +- LiquidAI/LFM2-350M +license: other +license_name: lfm1.0 +license_link: LICENSE +--- + +
+
+ Liquid AI +
+
+Try LFMDocsLEAPDiscord +
+
+ +
+ +([_日本語はこちらから_](#lfm2-350m-pii-extract-jp-日本語)) + +# LFM2-350M-PII-Extract-JP + +Based on [LFM2-350M](https://huggingface.co/LiquidAI/LFM2-350M), this checkpoint is designed to **extract personally identifiable information (PII) from Japanese text and output it in JSON format.** +The output can then be used to mask out sensitive information in contracts, emails, personal medical reports, insurance bills, etc. directly on-device. + +In particular, it is trained to extract: +* Address/locations (JSON key: `address`) +* Company/institute/organization names (JSON key: `company_name`) +* Email addresses (JSON key: `email_address`) +* Human names (JSON key: `human_name`) +* Phone numbers (JSON key: `phone_number`) +from Japanese documents and texts. + +### Demo + + +Running on a macbook pro. + +## Extraction Quality + +We evaluated several models, including GPT-5 and a 32B-parameter Qwen3 model with thinking mode enabled. +The image below shows the average recall score on 1,000 random samples, chunked into segments of 100–1,000 characters, taken from [finepdf](https://huggingface.co/datasets/HuggingFaceFW/finepdfs). +Overall, we found **LFM2-350M-PII-Extract-JP** to achieve GPT-5–level performance with only 350 million parameters—bringing cloud-grade performance to on-device applications! + +![Model Size vs Recall Score](https://cdn-uploads.huggingface.co/production/uploads/65d6b6c1a07ad79084a0d214/ToNbcFk-7Pyr-aReyErEm.png) + +### Sample Responses + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Input textOutput JSON
田中 太郎 様

平素より格別のご高配を賜り、誠にありがとうございます。

このたび、山田 花子 が ABCコーポレーション赤坂オフィス へ 田中 太郎 様をご招待いたしました。

ご来訪当日は、本メールに記載の「受付番号」または「受付QRコード」を使用してチェックインを行っていただきます。以下のいずれかの方法でご準備をお願いいたします。

・本メールを印刷してご持参いただく
・スマートフォンで本メールを表示いただく

■ご来訪情報
受付番号: 594813
日付: 2025-10-03
時間: 11:00
担当者: 山田 花子
場所: 東京都港区赤坂1-2-3 赤坂ビジネスタワー5階
{"address": ["東京都港区赤坂1-2-3 赤坂ビジネスタワー5階"], "company_name": ["ABCコーポレーション赤坂オフィス"], "email_address": [], "human_name": ["田中 太郎", "山田 花子"], "phone_number": []}
山口さん

CCに佐藤さんも追加させていただきました

打ち合わせ可能なお時間をご連絡いただきありがとうございます。
佐藤さんたちは御社との初回の打ち合わせのため、御社についての基本的なところからご紹介いただく場としていただき、
お忙しいところ恐縮ですが、我々の打ち合わせについては別枠として、10月8日(水)17:00~の佐藤さんたちとの打ち合わせの後に開催はいかがでしょうか。

終了時間が18時を過ぎてしまう可能性が高いため、ご都合が悪ければ別日で調整させてください。
お手数をおかけしますが、ご確認のほどどうぞよろしくお願いいたします。

中村 正信

Email: nakamura@japan.co.jp
{"address": [], "company_name": [], "email_address": ["nakamura@japan.co.jp"], "human_name": ["山口", "佐藤", "中村 正信"], "phone_number": []}
差出人: モンスタカード株式会社(自動配信)
件名: 【モスカードフリー】ご利用内容のお知らせ



夢野 みな 様

いつもモスカードフリーをご利用いただきありがとうございます。
お客様のカードご利用内容をお知らせいたします。



ご利用内容
• 利用日:2025年10月6日 18:38:15
• 利用先:ライトペイ
• ご利用金額:1000円
• 承認番号:294816
{"address": [], "company_name": ["モンスタカード株式会社", "ライトペイ"], "email_address": [], "human_name": ["夢野 みな"], "phone_number": []}
株式会社ネットワークソリューションズ

請求書

発行日:2024年10月27日
請求番号:NS-20241027-001

田中電気工業
東京都渋谷区神南1-23-10
電話番号:+81373453302

--------------------------------------------------------------------------------

**請求先**

伊藤潔文 様
ryuma@izumiya.cloud

--------------------------------------------------------------------------------
{"address": ["東京都渋谷区神南1-23-10"], "company_name": ["株式会社ネットワークソリューションズ", "田中電気工業"], "email_address": ["ryuma@izumiya.cloud"], "human_name": ["伊藤潔文"], "phone_number": ["+81373453302"]}
機材等借用書(一般化・架空化済)

作成日:2025年4月18日

貸主:ネオテック株式会社
住所:東京都中央区銀星町三丁目
担当者:山田 廉(yamada@neotech.example.jp)

借主:グローバルデバイスホールディングス株式会社
住所:東京都新都区青海一丁目
担当者:佐藤 翔(sato@globaldevices.example.com)



借用物品のリスト
- スマートフォン 型番:Nova X10 Pro 数量:1台 状態:良好
- モデル(Conversa 1B LFM, Conversa 3B LFM, Focus 1B LFM, Focus 1B VLM)



借用条件
- 借用期間:2025年4月18日から2025年6月30日まで
- 借用目的:オンデバイスモデルの動作検証
- 使用責任:物品は善良な管理者の注意をもって使用し、破損した場合は貸主に即時報告すること。
- 紛失・盗難時の対応:同等の物品を貸主に弁償すること。



貸主署名:中島 誠一
借主署名:高橋 健吾
{"address": ["東京都中央区銀星町三丁目", "東京都新都区青海一丁目"], "company_name": ["ネオテック株式会社", "グローバルデバイスホールディングス株式会社"], "email_address": ["yamada@neotech.example.jp"], "human_name": ["中島 誠一", "山田 廉", "佐藤 翔"], "phone_number": []}
+ +> [!NOTE] +> 📝 While LFM2-350M-PII-Extract-JP provides strong out-of-the-box PII entity extraction for the categories listed above, our primary goal is to deliver a versatile, community-driven base model—a foundation that makes it easy to build best-in-class, privacy-focused masking systems. +> +> Like any base model, there remain areas for continued development, particularly for specialized use cases: +> - Supporting extraction of organization-specific identification numbers +> - Expanding coverage to additional categories such as date of birth, passport numbers +> - Further improving extraction performance on particular categories +> +> These are precisely the kinds of challenges that fine-tuning—by both Liquid AI and our developer community can address. We see this model not just as an endpoint, but as a catalyst for a rich ecosystem of fine-tuned PII extraction models tailored to real-world needs. + +## Model Details + +**Generation parameters**: We strongly recommend using greedy decoding with a `temperature=0`. + +**System prompts**: This checkpoint **requires** the following system prompt: +``` +Extract
, , , , +``` + +Note the model can handle extraction of particular entities. E.g. The model will only output human names when the system prompt is set to `Extract `. + +> [!WARNING] +> ⚠️ For best performance, ensure alphabetical order of entity categories as shown above. + + +**Chat template**: LFM2-PII-Extract-JP uses a ChatML-like chat template as follows: + +``` +<|startoftext|><|im_start|>system +Extract
, , , , <|im_end|> +<|im_start|>user +こんにちは、ラミンさんに B200 GPU を 10000 台 至急請求してください。連絡先は celegans@liquid.ai (電話番号010-000-0000) で、これは C. elegans 線虫に着想を得たニューラルネットワークアーキテクチャを 今すぐ構築するために不可欠です。<|im_end|> +<|im_start|>assistant +{"address": [], "company_name": [], "email_address": ["celegans@liquid.ai"], "human_name": ["ラミン"], "phone_number": ["010-000-0000"]}<|im_end|> +``` + +You can automatically apply it using the dedicated [`.apply_chat_template()`](https://huggingface.co/docs/transformers/en/chat_templating#applychattemplate) function from Hugging Face transformers. + +> [!WARNING] +> ⚠️ The model is intended for single turn conversations. + +**Output format** + +The model outputs a JSON object containing the fields it was prompted to extract. +If no entities are found in a particular category, it returns an empty list for that category. +If entities are found, they are returned as a list for each prompted category. +The model is trained to output entities exactly as they appear in the text. +If the same entity appears multiple times with slight formatting variations, the model outputs all variations to ensure subsequent masking can be performed using exact matches. + + +## 🏃 How to run LFM2 + +- Huggingface: [LFM2-350M](https://huggingface.co/LiquidAI/LFM2-350M) +- llama.cpp: [LFM2-350M-PII-Extract-JP-GGUF](https://huggingface.co/LiquidAI/LFM2-350M-PII-Extract-JP-GGUF) +- LEAP: [LEAP model library](https://leap.liquid.ai/models?model=lfm2-350m-pii-extract-jp) + +You can use the following Colab notebooks for easy inference and fine-tuning: + +| Notebook | Description | Link | +|-------|------|------| +| Inference | Run the model with Hugging Face's transformers library. | Colab link | +| SFT (TRL) | Supervised Fine-Tuning (SFT) notebook with a LoRA adapter using TRL. | Colab link | +| DPO (TRL) | Preference alignment with Direct Preference Optimization (DPO) using TRL. | Colab link | +| SFT (Axolotl) | Supervised Fine-Tuning (SFT) notebook with a LoRA adapter using Axolotl. | Colab link | +| SFT (Unsloth) | Supervised Fine-Tuning (SFT) notebook with a LoRA adapter using Unsloth. | Colab link | + +## 📬 Contact + +- Got questions or want to connect? [Join our Discord community](https://discord.com/invite/liquid-ai) +- If you are interested in custom solutions with edge deployment, please contact [our sales team](https://www.liquid.ai/contact). + +## Citation + +``` +@article{liquidai2025lfm2, + title={LFM2 Technical Report}, + author={Liquid AI}, + journal={arXiv preprint arXiv:2511.23404}, + year={2025} +} +``` + +# LFM2-350M-PII-Extract-JP (日本語) + +[**LFM2-350M**](https://huggingface.co/LiquidAI/LFM2-350M) をベースにしたこのチェックポイントは、**日本語文書から個人を特定できる情報(PII)を抽出し、JSON 形式で出力します**。 +契約書、電子メール、個人の医療報告書、並びに保険請求書などの機密情報を、デバイス上で直接マスキングできます。 + +特に以下の情報を抽出するように訓練されています。 +* 住所/所在地(JSON key: `address`) +* 企業/研究機関/組織名(JSON key: `company_name`) +* メールアドレス(JSON key: `email_address`) +* 人名(JSON key: `human_name`) +* 電話番号(JSON key: `phone_number`) + +これらの情報を日本語の文書から抽出します。 + +--- + +### デモ + + + +--- + +## 性能 + +[**finepdf**](https://huggingface.co/datasets/HuggingFaceFW/finepdfs) から無作為に抽出した 1,000 サンプルを用いて、GPT5 や 32B パラメータの Qwen3 モデル(思考モードあり)など、複数のモデルとの比較評価を行いました。 +**LFM2-350M-PII-Extract-JP** は、わずか **350M パラメータ** という軽量モデルながら GPT5 と同等レベルの性能を発揮し、クラウドレベルの品質をあなたのデバイス上で実現します! + +![Model Size vs Recall Score](https://cdn-uploads.huggingface.co/production/uploads/65d6b6c1a07ad79084a0d214/ToNbcFk-7Pyr-aReyErEm.png) + +> [!NOTE] +> 📝 LFM2-350M-PII-Extract-JP は、上記カテゴリに対して優れた PII 抽出性能を有しますが、私たちの主な目的は、**コミュニティによって継続的に改良される柔軟な基盤モデルを提供すること**です。 +> このモデルで、誰でもプライバシー重視の高品質なマスキングシステムを容易に構築できます。 +> +> ただし、ベースモデルとして今後さらなる改善の余地があります。特に以下のような専門的な利用用途が想定されます。 +> - 組織固有の識別番号の抽出対応 +> - 生年月日、パスポート番号などの追加カテゴリへの拡張 +> - 特定カテゴリにおける抽出性能のさらなる改善 +> +> これらの課題は、**Liquid AI** および開発者コミュニティによるファインチューニングによって解決できると考えています。 +> LFM2-350M-PII-Extract-JP は完成形ではなく、**実運用ニーズに応じた多様な PII 抽出モデル群を生み出す出発点**であると位置づけています。 + +--- + +## モデル詳細 + +**生成パラメータ**: `temperature=0` の貪欲デコード(greedy decoding)の使用を強く推奨します。 + +**システムプロンプト**: このチェックポイントでは以下のシステムプロンプトが**必須**です: + +``` +Extract
, , , , +``` + +モデルは特定のエンティティのみを抽出するように設定することも可能です。 +例: `Extract ` と設定した場合、人名のみを出力します。 + +> [!WARNING] +> ⚠️ モデルの性能を最大限発揮させるには、上記のように **エンティティカテゴリをアルファベット順** に並べてください。 + +--- + +**チャットテンプレート** +LFM2-PII-Extract-JP は以下のような ChatML 風テンプレートを使用します。 + +``` +<|startoftext|><|im_start|>system +Extract , , , , <|im_end|> +<|im_start|>user +こんにちは、ラミンさんに B200 GPU を 10000 台 至急請求してください。連絡先は celegans@liquid.ai (電話番号010-000-0000) で、これは C. elegans 線虫に着想を得たニューラルネットワークアーキテクチャを 今すぐ構築するために不可欠です。<|im_end|> +<|im_start|>assistant +{“address”: [], “company_name”: [], “email_address”: [“celegans@liquid.ai”], “human_name”: [“ラミン”], “phone_number”: [“010-000-0000”]}<|im_end|> +``` + +このテンプレートは、Hugging Face Transformers の専用関数 [`.apply_chat_template()`](https://huggingface.co/docs/transformers/en/chat_templating#applychattemplate) を使用して自動的に適用できます。 + +> [!WARNING] +> ⚠️ このモデルは **一問一答形式 (単一ターン) の会話** に最適化されています。 + +--- + +**出力形式** + +モデルは、指定されたエンティティを含んだ JSON 形式で出力します。 +各カテゴリに該当するエンティティが見つからない場合は、空のリストを返します。 +該当するエンティティが存在する場合は、そのカテゴリごとに抽出された文字列のリストを返します。 + +モデルは、**テキスト中に現れる形式で正確にエンティティを出力**するように訓練されています。 +同じエンティティが複数回登場し表記に揺れがある場合でも、すべての表記バリエーションを出力し、マスキング時に完全一致で対応できるようになっています。 + +--- + +## 🏃 LFM2 の実行方法 + +- Hugging Face: [LFM2-350M](https://huggingface.co/LiquidAI/LFM2-350M) +- llama.cpp: [LFM2-350M-PII-Extract-JP-GGUF](https://huggingface.co/LiquidAI/LFM2-350M-PII-Extract-JP-GGUF) +- LEAP: [LEAP モデルライブラリ](https://leap.liquid.ai/models?model=lfm2-350m-pii-extract-jp) + +| ノートブック | 説明 | リンク | +|-------|------|------| +| 推論 | Hugging Faceのtransformersライブラリを使用してモデルを実行します。 | Colab link | +| SFT (TRL) | TRLを使用したLoRAアダプターによる教師あり学習(SFT)を行います。 | Colab link | +| DPO (TRL) | TRLを使用したDPOによる選好アライメントを行います。 | Colab link | +| SFT (Axolotl) | Axolotlを使用したLoRAアダプターによる教師あり学習(SFT)を行います。 | Colab link | +| SFT (Unsloth) | Unslothを使用したLoRAアダプターによる教師あり学習(SFT)を行います。 | Colab link | + +--- + +## 📬 お問い合わせ + +エッジ環境への導入を含むカスタムソリューションにご興味がある方は、[営業チーム](https://www.liquid.ai/ja/contact)までお問い合わせください。 + +## 引用 + +``` +@article{liquidai2025lfm2, + title={LFM2 Technical Report}, + author={Liquid AI}, + journal={arXiv preprint arXiv:2511.23404}, + year={2025} +} +``` \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..224b2d6 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,50 @@ +{{- bos_token -}} +{%- set ns = namespace(system_prompt="") -%} + +{# --- 1. Extract system prompt if provided --- #} +{%- if messages[0]["role"] == "system" -%} + {%- set ns.system_prompt = messages[0]["content"] -%} + {%- set messages = messages[1:] -%} +{%- else -%} + {# --- 2. Default system prompt if none provided --- #} + {%- set ns.system_prompt = "Extract
, , , , " -%} +{%- endif -%} + +{# --- 3. Add tool list if any --- #} +{%- if tools -%} + {%- set ns.system_prompt = ns.system_prompt + (" +" if ns.system_prompt else "") + "List of tools: <|tool_list_start|>[" -%} + {%- for tool in tools -%} + {%- if tool is not string -%} + {%- set tool = tool | tojson -%} + {%- endif -%} + {%- set ns.system_prompt = ns.system_prompt + tool -%} + {%- if not loop.last -%} + {%- set ns.system_prompt = ns.system_prompt + ", " -%} + {%- endif -%} + {%- endfor -%} + {%- set ns.system_prompt = ns.system_prompt + "]<|tool_list_end|>" -%} +{%- endif -%} + +{# --- 4. Render system prompt --- #} +{%- if ns.system_prompt -%} + {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}} +{%- endif -%} + +{# --- 5. Render all conversation messages --- #} +{%- for message in messages -%} + {{- "<|im_start|>" + message["role"] + "\n" -}} + {%- set content = message["content"] -%} + {%- if content is not string -%} + {%- set content = content | tojson -%} + {%- endif -%} + {%- if message["role"] == "tool" -%} + {%- set content = "<|tool_response_start|>" + content + "<|tool_response_end|>" -%} + {%- endif -%} + {{- content + "<|im_end|>\n" -}} +{%- endfor -%} + +{# --- 6. Append generation prompt for assistant --- #} +{%- if add_generation_prompt -%} + {{- "<|im_start|>assistant\n" -}} +{%- endif -%} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..24d37df --- /dev/null +++ b/config.json @@ -0,0 +1,59 @@ +{ + "architectures": [ + "Lfm2ForCausalLM" + ], + "block_auto_adjust_ff_dim": true, + "block_dim": 1024, + "block_ff_dim": 6656, + "block_ffn_dim_multiplier": 1.0, + "block_mlp_init_scale": 1.0, + "block_multiple_of": 256, + "block_norm_eps": 1e-05, + "block_out_init_scale": 1.0, + "block_use_swiglu": true, + "block_use_xavier_init": true, + "bos_token_id": 1, + "conv_L_cache": 3, + "conv_bias": false, + "conv_dim": 1024, + "conv_dim_out": 1024, + "conv_use_xavier_init": true, + "eos_token_id": 7, + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 6656, + "layer_types": [ + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv", + "full_attention", + "conv" + ], + "max_position_embeddings": 128000, + "model_type": "lfm2", + "norm_eps": 1e-05, + "num_attention_heads": 16, + "num_heads": 16, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pad_token_id": 0, + "rope_theta": 1000000.0, + "theta": 1000000.0, + "tie_embedding": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.55.0.dev0", + "use_cache": true, + "use_pos_enc": true, + "vocab_size": 65536 +} diff --git a/configuration.json b/configuration.json new file mode 100644 index 0000000..bbeeda1 --- /dev/null +++ b/configuration.json @@ -0,0 +1 @@ +{"framework": "pytorch", "task": "text-generation", "allow_remote": true} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..c55d106 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 7, + "pad_token_id": 0, + "transformers_version": "4.55.0.dev0" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..2163844 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fda23dc8ed57eaa5679272b8e664dd5324ba60734002f0d72cb9e2561be2e7c5 +size 708984464 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..b28c8a1 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|startoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..971e6d7 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89f0c8a6a1a96dd707d2efd06d815aa86123aaaad8328dda13188fcad6e25592 +size 4732406 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..39a5114 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,4078 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "<|pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "<|startoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "<|endoftext|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "<|fim_pre|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "4": { + "content": "<|fim_mid|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "5": { + "content": "<|fim_suf|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "6": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "7": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "8": { + "content": "<|tool_list_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "9": { + "content": "<|tool_list_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "10": { + "content": "<|tool_call_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "11": { + "content": "<|tool_call_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "12": { + "content": "<|tool_response_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "13": { + "content": "<|tool_response_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "16": { + "content": "<|reserved_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "17": { + "content": "<|reserved_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "18": { + "content": "<|reserved_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "19": { + "content": "<|reserved_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "20": { + "content": "<|reserved_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "21": { + "content": "<|reserved_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "22": { + "content": "<|reserved_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "23": { + "content": "<|reserved_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "24": { + "content": "<|reserved_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "25": { + "content": "<|reserved_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "26": { + "content": "<|reserved_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "27": { + "content": "<|reserved_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "28": { + "content": "<|reserved_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "29": { + "content": "<|reserved_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "30": { + "content": "<|reserved_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "31": { + "content": "<|reserved_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32": { + "content": "<|reserved_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "33": { + "content": "<|reserved_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "34": { + "content": "<|reserved_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "35": { + "content": "<|reserved_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "36": { + "content": "<|reserved_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "37": { + "content": "<|reserved_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "38": { + "content": "<|reserved_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "39": { + "content": "<|reserved_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "40": { + "content": "<|reserved_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "41": { + "content": "<|reserved_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "42": { + "content": "<|reserved_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "43": { + "content": "<|reserved_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "44": { + "content": "<|reserved_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "45": { + "content": "<|reserved_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "46": { + "content": "<|reserved_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "47": { + "content": "<|reserved_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "48": { + "content": "<|reserved_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "49": { + "content": "<|reserved_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "50": { + "content": "<|reserved_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "51": { + "content": "<|reserved_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "52": { + "content": "<|reserved_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "53": { + "content": "<|reserved_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "54": { + "content": "<|reserved_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "55": { + "content": "<|reserved_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "56": { + "content": "<|reserved_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "57": { + "content": "<|reserved_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "58": { + "content": "<|reserved_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "59": { + "content": "<|reserved_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "60": { + "content": "<|reserved_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "61": { + "content": "<|reserved_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "62": { + "content": "<|reserved_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "63": { + "content": "<|reserved_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "64": { + "content": "<|reserved_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "65": { + "content": "<|reserved_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "66": { + "content": "<|reserved_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "67": { + "content": "<|reserved_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "68": { + "content": "<|reserved_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "69": { + "content": "<|reserved_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "70": { + "content": "<|reserved_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "71": { + "content": "<|reserved_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "72": { + "content": "<|reserved_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "73": { + "content": "<|reserved_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "74": { + "content": "<|reserved_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "75": { + "content": "<|reserved_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "76": { + "content": "<|reserved_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "77": { + "content": "<|reserved_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "78": { + "content": "<|reserved_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "79": { + "content": "<|reserved_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "80": { + "content": "<|reserved_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "81": { + "content": "<|reserved_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "82": { + "content": "<|reserved_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "83": { + "content": "<|reserved_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "84": { + "content": "<|reserved_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "85": { + "content": "<|reserved_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "86": { + "content": "<|reserved_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "87": { + "content": "<|reserved_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "88": { + "content": "<|reserved_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "89": { + "content": "<|reserved_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "90": { + "content": "<|reserved_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "91": { + "content": "<|reserved_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "92": { + "content": "<|reserved_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "93": { + "content": "<|reserved_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "94": { + "content": "<|reserved_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "95": { + "content": "<|reserved_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "96": { + "content": "<|reserved_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "97": { + "content": "<|reserved_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "98": { + "content": "<|reserved_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "99": { + "content": "<|reserved_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "100": { + "content": "<|reserved_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "101": { + "content": "<|reserved_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "102": { + "content": "<|reserved_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "103": { + "content": "<|reserved_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "104": { + "content": "<|reserved_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "105": { + "content": "<|reserved_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "106": { + "content": "<|reserved_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "107": { + "content": "<|reserved_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "108": { + "content": "<|reserved_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "109": { + "content": "<|reserved_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "110": { + "content": "<|reserved_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "111": { + "content": "<|reserved_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "112": { + "content": "<|reserved_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "113": { + "content": "<|reserved_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "114": { + "content": "<|reserved_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "115": { + "content": "<|reserved_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "116": { + "content": "<|reserved_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "117": { + "content": "<|reserved_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "118": { + "content": "<|reserved_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "119": { + "content": "<|reserved_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "120": { + "content": "<|reserved_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "121": { + "content": "<|reserved_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "122": { + "content": "<|reserved_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "123": { + "content": "<|reserved_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "124": { + "content": "<|reserved_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "125": { + "content": "<|reserved_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "126": { + "content": "<|reserved_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "127": { + "content": "<|reserved_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128": { + "content": "<|audio_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "129": { + "content": "<|text_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "130": { + "content": "<|text_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "131": { + "content": "<|mixed_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "132": { + "content": "<|mixed_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "133": { + "content": "<|reserved_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "134": { + "content": "<|reserved_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "135": { + "content": "<|reserved_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "136": { + "content": "<|reserved_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "137": { + "content": "<|reserved_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "138": { + "content": "<|reserved_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "139": { + "content": "<|reserved_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "140": { + "content": "<|reserved_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "141": { + "content": "<|reserved_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "142": { + "content": "<|reserved_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "143": { + "content": "<|reserved_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "144": { + "content": "<|reserved_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "145": { + "content": "<|reserved_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "146": { + "content": "<|reserved_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "147": { + "content": "<|reserved_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "148": { + "content": "<|reserved_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "149": { + "content": "<|reserved_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "150": { + "content": "<|reserved_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151": { + "content": "<|reserved_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "152": { + "content": "<|reserved_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "153": { + "content": "<|reserved_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "154": { + "content": "<|reserved_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "155": { + "content": "<|reserved_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "156": { + "content": "<|reserved_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "157": { + "content": "<|reserved_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "158": { + "content": "<|reserved_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "159": { + "content": "<|reserved_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "160": { + "content": "<|reserved_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "161": { + "content": "<|reserved_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "162": { + "content": "<|reserved_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "163": { + "content": "<|reserved_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "164": { + "content": "<|reserved_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "165": { + "content": "<|reserved_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "166": { + "content": "<|reserved_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "167": { + "content": "<|reserved_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "168": { + "content": "<|reserved_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "169": { + "content": "<|reserved_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "170": { + "content": "<|reserved_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "171": { + "content": "<|reserved_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "172": { + "content": "<|reserved_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "173": { + "content": "<|reserved_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "174": { + "content": "<|reserved_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "175": { + "content": "<|reserved_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "176": { + "content": "<|reserved_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "177": { + "content": "<|reserved_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "178": { + "content": "<|reserved_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "179": { + "content": "<|reserved_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "180": { + "content": "<|reserved_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "181": { + "content": "<|reserved_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "182": { + "content": "<|reserved_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "183": { + "content": "<|reserved_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "184": { + "content": "<|reserved_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "185": { + "content": "<|reserved_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "186": { + "content": "<|reserved_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "187": { + "content": "<|reserved_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "188": { + "content": "<|reserved_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "189": { + "content": "<|reserved_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "190": { + "content": "<|reserved_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "191": { + "content": "<|reserved_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "192": { + "content": "<|reserved_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "193": { + "content": "<|reserved_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "194": { + "content": "<|reserved_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "195": { + "content": "<|reserved_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "196": { + "content": "<|reserved_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "197": { + "content": "<|reserved_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "198": { + "content": "<|reserved_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "199": { + "content": "<|reserved_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "200": { + "content": "<|reserved_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "201": { + "content": "<|reserved_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "202": { + "content": "<|reserved_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "203": { + "content": "<|reserved_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "204": { + "content": "<|reserved_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "205": { + "content": "<|reserved_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "206": { + "content": "<|reserved_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "207": { + "content": "<|reserved_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "208": { + "content": "<|reserved_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "209": { + "content": "<|reserved_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "210": { + "content": "<|reserved_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "211": { + "content": "<|reserved_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "212": { + "content": "<|reserved_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "213": { + "content": "<|reserved_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "214": { + "content": "<|reserved_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "215": { + "content": "<|reserved_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "216": { + "content": "<|reserved_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "217": { + "content": "<|reserved_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "218": { + "content": "<|reserved_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "219": { + "content": "<|reserved_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "220": { + "content": "<|reserved_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "221": { + "content": "<|reserved_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "222": { + "content": "<|reserved_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "223": { + "content": "<|reserved_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "224": { + "content": "<|reserved_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "225": { + "content": "<|reserved_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "226": { + "content": "<|reserved_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "227": { + "content": "<|reserved_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "228": { + "content": "<|reserved_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "229": { + "content": "<|reserved_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "230": { + "content": "<|reserved_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "231": { + "content": "<|reserved_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "232": { + "content": "<|reserved_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "233": { + "content": "<|reserved_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "234": { + "content": "<|reserved_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "235": { + "content": "<|reserved_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "236": { + "content": "<|reserved_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "237": { + "content": "<|reserved_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "238": { + "content": "<|reserved_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "239": { + "content": "<|reserved_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "240": { + "content": "<|reserved_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "241": { + "content": "<|reserved_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "242": { + "content": "<|reserved_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "243": { + "content": "<|reserved_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "244": { + "content": "<|reserved_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "245": { + "content": "<|reserved_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "246": { + "content": "<|reserved_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "247": { + "content": "<|reserved_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "248": { + "content": "<|reserved_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "249": { + "content": "<|reserved_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250": { + "content": "<|reserved_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "251": { + "content": "<|reserved_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "252": { + "content": "<|reserved_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "253": { + "content": "<|reserved_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "254": { + "content": "<|reserved_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "255": { + "content": "<|reserved_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "256": { + "content": "<|reserved_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "257": { + "content": "<|reserved_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "258": { + "content": "<|reserved_248|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "259": { + "content": "<|reserved_249|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "260": { + "content": "<|reserved_250|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "261": { + "content": "<|reserved_251|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "262": { + "content": "<|reserved_252|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "263": { + "content": "<|reserved_253|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "264": { + "content": "<|reserved_254|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "265": { + "content": "<|reserved_255|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "266": { + "content": "<|reserved_256|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "267": { + "content": "<|reserved_257|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "268": { + "content": "<|reserved_258|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "269": { + "content": "<|reserved_259|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "270": { + "content": "<|reserved_260|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "271": { + "content": "<|reserved_261|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "272": { + "content": "<|reserved_262|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "273": { + "content": "<|reserved_263|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "274": { + "content": "<|reserved_264|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "275": { + "content": "<|reserved_265|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "276": { + "content": "<|reserved_266|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "277": { + "content": "<|reserved_267|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "278": { + "content": "<|reserved_268|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "279": { + "content": "<|reserved_269|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "280": { + "content": "<|reserved_270|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "281": { + "content": "<|reserved_271|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "282": { + "content": "<|reserved_272|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "283": { + "content": "<|reserved_273|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "284": { + "content": "<|reserved_274|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "285": { + "content": "<|reserved_275|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "286": { + "content": "<|reserved_276|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "287": { + "content": "<|reserved_277|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "288": { + "content": "<|reserved_278|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "289": { + "content": "<|reserved_279|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "290": { + "content": "<|reserved_280|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "291": { + "content": "<|reserved_281|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "292": { + "content": "<|reserved_282|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "293": { + "content": "<|reserved_283|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "294": { + "content": "<|reserved_284|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "295": { + "content": "<|reserved_285|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "296": { + "content": "<|reserved_286|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "297": { + "content": "<|reserved_287|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "298": { + "content": "<|reserved_288|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "299": { + "content": "<|reserved_289|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "300": { + "content": "<|reserved_290|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "301": { + "content": "<|reserved_291|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "302": { + "content": "<|reserved_292|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "303": { + "content": "<|reserved_293|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "304": { + "content": "<|reserved_294|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "305": { + "content": "<|reserved_295|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "306": { + "content": "<|reserved_296|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "307": { + "content": "<|reserved_297|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "308": { + "content": "<|reserved_298|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "309": { + "content": "<|reserved_299|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "310": { + "content": "<|reserved_300|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "311": { + "content": "<|reserved_301|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "312": { + "content": "<|reserved_302|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "313": { + "content": "<|reserved_303|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "314": { + "content": "<|reserved_304|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "315": { + "content": "<|reserved_305|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "316": { + "content": "<|reserved_306|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "317": { + "content": "<|reserved_307|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "318": { + "content": "<|reserved_308|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "319": { + "content": "<|reserved_309|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "320": { + "content": "<|reserved_310|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "321": { + "content": "<|reserved_311|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "322": { + "content": "<|reserved_312|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "323": { + "content": "<|reserved_313|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "324": { + "content": "<|reserved_314|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "325": { + "content": "<|reserved_315|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "326": { + "content": "<|reserved_316|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "327": { + "content": "<|reserved_317|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "328": { + "content": "<|reserved_318|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "329": { + "content": "<|reserved_319|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "330": { + "content": "<|reserved_320|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "331": { + "content": "<|reserved_321|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "332": { + "content": "<|reserved_322|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "333": { + "content": "<|reserved_323|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "334": { + "content": "<|reserved_324|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "335": { + "content": "<|reserved_325|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "336": { + "content": "<|reserved_326|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "337": { + "content": "<|reserved_327|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "338": { + "content": "<|reserved_328|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "339": { + "content": "<|reserved_329|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "340": { + "content": "<|reserved_330|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "341": { + "content": "<|reserved_331|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "342": { + "content": "<|reserved_332|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "343": { + "content": "<|reserved_333|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "344": { + "content": "<|reserved_334|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "345": { + "content": "<|reserved_335|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "346": { + "content": "<|reserved_336|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "347": { + "content": "<|reserved_337|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "348": { + "content": "<|reserved_338|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "349": { + "content": "<|reserved_339|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "350": { + "content": "<|reserved_340|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "351": { + "content": "<|reserved_341|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "352": { + "content": "<|reserved_342|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "353": { + "content": "<|reserved_343|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "354": { + "content": "<|reserved_344|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "355": { + "content": "<|reserved_345|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "356": { + "content": "<|reserved_346|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "357": { + "content": "<|reserved_347|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "358": { + "content": "<|reserved_348|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "359": { + "content": "<|reserved_349|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "360": { + "content": "<|reserved_350|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "361": { + "content": "<|reserved_351|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "362": { + "content": "<|reserved_352|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "363": { + "content": "<|reserved_353|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "364": { + "content": "<|reserved_354|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "365": { + "content": "<|reserved_355|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "366": { + "content": "<|reserved_356|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "367": { + "content": "<|reserved_357|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "368": { + "content": "<|reserved_358|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "369": { + "content": "<|reserved_359|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "370": { + "content": "<|reserved_360|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "371": { + "content": "<|reserved_361|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "372": { + "content": "<|reserved_362|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "373": { + "content": "<|reserved_363|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "374": { + "content": "<|reserved_364|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "375": { + "content": "<|reserved_365|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "376": { + "content": "<|reserved_366|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "377": { + "content": "<|reserved_367|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "378": { + "content": "<|reserved_368|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "379": { + "content": "<|reserved_369|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "380": { + "content": "<|reserved_370|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "381": { + "content": "<|reserved_371|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "382": { + "content": "<|reserved_372|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "383": { + "content": "<|reserved_373|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "384": { + "content": "<|reserved_374|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "385": { + "content": "<|reserved_375|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "386": { + "content": "<|reserved_376|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "387": { + "content": "<|reserved_377|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "388": { + "content": "<|reserved_378|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "389": { + "content": "<|reserved_379|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "390": { + "content": "<|reserved_380|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "391": { + "content": "<|reserved_381|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "392": { + "content": "<|reserved_382|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "393": { + "content": "<|reserved_383|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "394": { + "content": "<|reserved_384|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "395": { + "content": "<|reserved_385|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "396": { + "content": "<|reserved_386|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "397": { + "content": "<|reserved_387|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "398": { + "content": "<|reserved_388|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "399": { + "content": "<|reserved_389|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "400": { + "content": "<|reserved_390|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "401": { + "content": "<|reserved_391|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "402": { + "content": "<|reserved_392|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "403": { + "content": "<|reserved_393|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "404": { + "content": "<|reserved_394|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "405": { + "content": "<|reserved_395|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "406": { + "content": "<|reserved_396|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "407": { + "content": "<|reserved_397|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "408": { + "content": "<|reserved_398|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "409": { + "content": "<|reserved_399|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "410": { + "content": "<|reserved_400|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "411": { + "content": "<|reserved_401|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "412": { + "content": "<|reserved_402|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "413": { + "content": "<|reserved_403|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "414": { + "content": "<|reserved_404|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "415": { + "content": "<|reserved_405|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "416": { + "content": "<|reserved_406|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "417": { + "content": "<|reserved_407|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "418": { + "content": "<|reserved_408|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "419": { + "content": "<|reserved_409|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "420": { + "content": "<|reserved_410|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "421": { + "content": "<|reserved_411|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "422": { + "content": "<|reserved_412|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "423": { + "content": "<|reserved_413|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "424": { + "content": "<|reserved_414|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "425": { + "content": "<|reserved_415|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "426": { + "content": "<|reserved_416|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "427": { + "content": "<|reserved_417|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "428": { + "content": "<|reserved_418|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "429": { + "content": "<|reserved_419|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "430": { + "content": "<|reserved_420|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "431": { + "content": "<|reserved_421|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "432": { + "content": "<|reserved_422|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "433": { + "content": "<|reserved_423|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "434": { + "content": "<|reserved_424|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "435": { + "content": "<|reserved_425|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "436": { + "content": "<|reserved_426|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "437": { + "content": "<|reserved_427|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "438": { + "content": "<|reserved_428|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "439": { + "content": "<|reserved_429|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "440": { + "content": "<|reserved_430|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "441": { + "content": "<|reserved_431|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "442": { + "content": "<|reserved_432|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "443": { + "content": "<|reserved_433|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "444": { + "content": "<|reserved_434|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "445": { + "content": "<|reserved_435|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "446": { + "content": "<|reserved_436|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "447": { + "content": "<|reserved_437|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "448": { + "content": "<|reserved_438|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "449": { + "content": "<|reserved_439|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "450": { + "content": "<|reserved_440|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "451": { + "content": "<|reserved_441|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "452": { + "content": "<|reserved_442|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "453": { + "content": "<|reserved_443|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "454": { + "content": "<|reserved_444|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "455": { + "content": "<|reserved_445|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "456": { + "content": "<|reserved_446|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "457": { + "content": "<|reserved_447|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "458": { + "content": "<|reserved_448|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "459": { + "content": "<|reserved_449|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "460": { + "content": "<|reserved_450|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "461": { + "content": "<|reserved_451|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "462": { + "content": "<|reserved_452|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "463": { + "content": "<|reserved_453|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "464": { + "content": "<|reserved_454|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "465": { + "content": "<|reserved_455|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "466": { + "content": "<|reserved_456|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "467": { + "content": "<|reserved_457|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "468": { + "content": "<|reserved_458|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "469": { + "content": "<|reserved_459|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "470": { + "content": "<|reserved_460|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "471": { + "content": "<|reserved_461|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "472": { + "content": "<|reserved_462|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "473": { + "content": "<|reserved_463|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "474": { + "content": "<|reserved_464|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "475": { + "content": "<|reserved_465|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "476": { + "content": "<|reserved_466|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "477": { + "content": "<|reserved_467|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "478": { + "content": "<|reserved_468|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "479": { + "content": "<|reserved_469|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "480": { + "content": "<|reserved_470|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "481": { + "content": "<|reserved_471|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "482": { + "content": "<|reserved_472|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "483": { + "content": "<|reserved_473|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "484": { + "content": "<|reserved_474|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "485": { + "content": "<|reserved_475|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "486": { + "content": "<|reserved_476|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "487": { + "content": "<|reserved_477|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "488": { + "content": "<|reserved_478|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "489": { + "content": "<|reserved_479|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "490": { + "content": "<|reserved_480|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "491": { + "content": "<|reserved_481|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "492": { + "content": "<|reserved_482|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "493": { + "content": "<|reserved_483|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "494": { + "content": "<|reserved_484|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "495": { + "content": "<|reserved_485|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "496": { + "content": "<|reserved_486|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "497": { + "content": "<|reserved_487|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "498": { + "content": "<|reserved_488|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "499": { + "content": "<|reserved_489|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "500": { + "content": "<|reserved_490|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "64011": { + "content": "Mathias", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "64014": { + "content": "python", + "lstrip": false, + "normalized": true, + "rstrip": false, + "single_word": false, + "special": false + }, + "64394": { + "content": "<|cot_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "64395": { + "content": "<|cot_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "64396": { + "content": "<|review_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "64397": { + "content": "<|review_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "64398": { + "content": "<|file_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "64399": { + "content": "<|file_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "<|startoftext|>", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "extra_special_tokens": {}, + "legacy": false, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 1000000000000000019884624838656, + "pad_token": "<|pad|>", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizerFast", + "use_default_system_prompt": false, + "use_fast": true +}