初始化项目,由ModelHub XC社区提供模型
Model: reaperdoesntknow/SMOLM2Prover Source: Original Platform
This commit is contained in:
35
.gitattributes
vendored
Normal file
35
.gitattributes
vendored
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
200
README.md
Normal file
200
README.md
Normal file
@@ -0,0 +1,200 @@
|
|||||||
|
---
|
||||||
|
library_name: transformers
|
||||||
|
model_name: SmolLM2Prover
|
||||||
|
tags:
|
||||||
|
- text-generation
|
||||||
|
- proof
|
||||||
|
- cot
|
||||||
|
- reasoning
|
||||||
|
- math
|
||||||
|
- calculus
|
||||||
|
- logic
|
||||||
|
- sft
|
||||||
|
- trl
|
||||||
|
- generated_from_trainer
|
||||||
|
- finetune
|
||||||
|
- symbioticai
|
||||||
|
- convergentintel
|
||||||
|
language:
|
||||||
|
- en
|
||||||
|
license: apache-2.0
|
||||||
|
datasets:
|
||||||
|
- AI-MO/NuminaMath-1.5
|
||||||
|
base_model:
|
||||||
|
- prithivMLmods/SmolLM2-CoT-360M
|
||||||
|
pipeline_tag: text-generation
|
||||||
|
---
|
||||||
|
|
||||||
|
# Model Card for SmolLM2Prover
|
||||||
|
|
||||||
|
**SmolLM2Prover** is a specialized, fine-tuned version of [prithivMLmods/SmolLM2-CoT-360M](https://huggingface.co/prithivMLmods/SmolLM2-CoT-360M). While retaining the strong conversational abilities of its base model, this version has been specifically enhanced to excel at deep thinking, logical reasoning, and higher-level mathematics, with a focus on generating step-by-step proofs and explanations (Chain-of-Thought).
|
||||||
|
|
||||||
|
The model was fine-tuned using multiple rounds of Supervised Fine-Tuning (SFT) with the [TRL](https://github.com/huggingface/trl) library on a curated dataset, enhancing its ability to follow complex instructions and reason through problems.
|
||||||
|
|
||||||
|
## Model Details
|
||||||
|
|
||||||
|
* **Base Model:** [prithivMLmods/SmolLM2-CoT-360M](https://huggingface.co/prithivMLmods/SmolLM2-CoT-360M)
|
||||||
|
* **Fine-tuning Library:** TRL (Transformer Reinforcement Learning)
|
||||||
|
* **Specialization:** Mathematical reasoning, proof generation, Chain-of-Thought (CoT)
|
||||||
|
* **Training Data:** Fine-tuned on `AI-MO/NuminaMath-1.5` and an additional ~1 million tokens of custom-formatted reasoning data.
|
||||||
|
|
||||||
|
## How to Use
|
||||||
|
|
||||||
|
This model is intended to be used for text generation tasks that require logical reasoning or advanced conversation.
|
||||||
|
|
||||||
|
### Using the Pipeline
|
||||||
|
|
||||||
|
The easiest way to use the model is with the `transformers` pipeline.
|
||||||
|
|
||||||
|
```python
|
||||||
|
from transformers import pipeline
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
model_id = "reaperdoesntknow/SMOLM2Prover"
|
||||||
|
prompt = "Prove that the derivative of f(x) = x^2 is f'(x) = 2x using the limit definition of a derivative."
|
||||||
|
|
||||||
|
generator = pipeline(
|
||||||
|
"text-generation",
|
||||||
|
model=model_id,
|
||||||
|
torch_dtype=torch.bfloat16, # Or torch.float16 if bfloat16 is not available
|
||||||
|
device_map="auto"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Using a chat format for better instruction following
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": f"You are a helpful math assistant. Please solve the following problem step-by-step.\n\n{prompt}"}
|
||||||
|
]
|
||||||
|
|
||||||
|
output = generator(messages, max_new_tokens=512, return_full_text=False)
|
||||||
|
print(output[0]["generated_text"])
|
||||||
|
|
||||||
|
Manual Usage
|
||||||
|
For more control, you can use AutoModelForCausalLM and AutoTokenizer directly.
|
||||||
|
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||||
|
import torch
|
||||||
|
|
||||||
|
|
||||||
|
model_id = "reaperdoesntknow/SMOLM2Prover"
|
||||||
|
prompt = "Prove that the derivative of f(x) = x^2 is f'(x) = 2x using the limit definition of a derivative."
|
||||||
|
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
model_id,
|
||||||
|
torch_dtype=torch.bfloat16, # Or torch.float16
|
||||||
|
device_map="auto"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Apply the chat template for proper formatting
|
||||||
|
messages = [
|
||||||
|
{"role": "user", "content": f"You are a helpful math assistant. Please solve the following problem step-by-step.\n\n{prompt}"}
|
||||||
|
]
|
||||||
|
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
|
||||||
|
|
||||||
|
outputs = model.generate(tokenized_chat, max_new_tokens=512)
|
||||||
|
decoded_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
||||||
|
# Print only the generated part
|
||||||
|
print(decoded_output.split("assistant\n")[-1])
|
||||||
|
```
|
||||||
|
### Training
|
||||||
|
|
||||||
|
The model underwent several rounds of Supervised Fine-Tuning (SFT) using TRL's SFTTrainer.
|
||||||
|
* Training Data: The primary dataset used was AI-MO/NuminaMath-1.5, augmented with approximately 1 million additional tokens. This data was formatted with a specific prompt structure designed to elicit step-by-step, chain-of-thought reasoning from the model.
|
||||||
|
* Process: The iterative SFT approach allowed for progressive refinement of the model's reasoning capabilities.
|
||||||
|
|
||||||
|
## Framework Versions
|
||||||
|
* Transformers: 4.56.0
|
||||||
|
* Pytorch: 2.8.0+cu126
|
||||||
|
* TRL: 0.22.2
|
||||||
|
* Datasets: 4.0.0
|
||||||
|
* Tokenizers: 0.22.0
|
||||||
|
|
||||||
|
### Intended Use
|
||||||
|
This model is a versatile tool suitable for a range of applications, from everyday conversation to complex problem-solving.
|
||||||
|
* Primary Use Cases (Specialized Skills):
|
||||||
|
* Educational tools for higher-level mathematics and logic.
|
||||||
|
* Automated proof generation and verification.
|
||||||
|
* Step-by-step problem-solving assistants for complex topics.
|
||||||
|
* Serving as a "thinking" component for applications requiring deep reasoning.
|
||||||
|
* General Use Cases:
|
||||||
|
* General-purpose conversation and advanced chatbot applications.
|
||||||
|
* Complex instruction-following tasks.
|
||||||
|
* Content generation that requires logical consistency.
|
||||||
|
Limitations and Bias
|
||||||
|
* Mathematical Accuracy: While highly capable, the model can still make errors or "hallucinate" incorrect steps or solutions in complex mathematical proofs. All outputs, especially for critical applications, should be verified by a human expert.
|
||||||
|
* Domain Performance: The model's performance is most reliable on problems similar to its training data. While it is designed to handle higher levels of math and deep thinking, its accuracy in novel or esoteric domains should be carefully evaluated.
|
||||||
|
* Inherited Bias: This model inherits any biases present in the base model (SmolLM2-CoT-360M) and the training datasets.
|
||||||
|
### Acknowledgements
|
||||||
|
You're doing great!
|
||||||
|
## Discrepancy Calculus Foundation
|
||||||
|
|
||||||
|
This model is part of the [Convergent Intelligence LLC: Research Division](https://huggingface.co/reaperdoesntknow) portfolio. All models in this portfolio are developed under the Discrepancy Calculus (DISC) framework — a measure-theoretic approach to understanding and controlling the gap between what a model *should* produce and what it *actually* produces.
|
||||||
|
|
||||||
|
DISC treats training singularities (loss plateaus, mode collapse, catastrophic forgetting) not as failures to be smoothed over, but as **structural signals** that reveal the geometry of the learning problem. Key concepts:
|
||||||
|
|
||||||
|
- **Discrepancy Operator (D):** Measures the gap between expected and observed behavior at each training step
|
||||||
|
- **Jump Sets:** Boundaries where model behavior changes discontinuously — these are *features*, not bugs
|
||||||
|
- **Ghost Imprinting:** Teacher knowledge that transfers to student models through weight-space topology rather than explicit distillation signal
|
||||||
|
|
||||||
|
For the full mathematical treatment, see [Discrepancy Calculus: Foundations and Core Theory](https://huggingface.co/reaperdoesntknow/Discrepancy_Calculus) (DOI: 10.57967/hf/8194).
|
||||||
|
|
||||||
|
**Citation chain:** [Structure Over Scale](https://huggingface.co/reaperdoesntknow/Structure-Over-Scale) (DOI: 10.57967/hf/8165) → [Three Teachers to Dual Cognition](https://huggingface.co/reaperdoesntknow/DualMind_Methodolgy) (DOI: 10.57967/hf/8184) → [Discrepancy Calculus](https://huggingface.co/reaperdoesntknow/Discrepancy_Calculus) (DOI: 10.57967/hf/8194)
|
||||||
|
|
||||||
|
## Citations
|
||||||
|
If you use TRL in your work, please cite the library:
|
||||||
|
@misc{vonwerra2022trl,
|
||||||
|
title = {{TRL: Transformer Reinforcement Learning}},
|
||||||
|
author = {Leandro von Werra and Younes Belkada and Lewis Tunstall and Edward Beeching and Tristan Thrush and Nathan Lambert and Shengyi Huang and Kashif Rasul and Quentin Gallou{\'e}dec},
|
||||||
|
year = 2020,
|
||||||
|
journal = {GitHub repository},
|
||||||
|
publisher = {GitHub},
|
||||||
|
howpublished = {\url{[https://github.com/huggingface/trl](https://github.com/huggingface/trl)}}
|
||||||
|
}
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Convergent Intelligence Portfolio
|
||||||
|
|
||||||
|
*Part of the [Standalone Models](https://huggingface.co/reaperdoesntknow) by [Convergent Intelligence LLC: Research Division](https://huggingface.co/reaperdoesntknow)*
|
||||||
|
|
||||||
|
|
||||||
|
### Related Models
|
||||||
|
|
||||||
|
| Model | Downloads | Format |
|
||||||
|
|-------|-----------|--------|
|
||||||
|
| [SMOLM2Prover-GGUF](https://huggingface.co/reaperdoesntknow/SMOLM2Prover-GGUF) | 150 | GGUF |
|
||||||
|
| [DeepReasoning_1R](https://huggingface.co/reaperdoesntknow/DeepReasoning_1R) | 16 | HF |
|
||||||
|
| [SAGI](https://huggingface.co/reaperdoesntknow/SAGI) | 3 | HF |
|
||||||
|
| [S-AGI](https://huggingface.co/reaperdoesntknow/S-AGI) | 0 | HF |
|
||||||
|
|
||||||
|
### Top Models from Our Lab
|
||||||
|
|
||||||
|
| Model | Downloads |
|
||||||
|
|-------|-----------|
|
||||||
|
| [Qwen3-1.7B-Thinking-Distil](https://huggingface.co/reaperdoesntknow/Qwen3-1.7B-Thinking-Distil) | 501 |
|
||||||
|
| [LFM2.5-1.2B-Distilled-SFT](https://huggingface.co/reaperdoesntknow/LFM2.5-1.2B-Distilled-SFT) | 342 |
|
||||||
|
| [Qwen3-1.7B-Coder-Distilled-SFT](https://huggingface.co/reaperdoesntknow/Qwen3-1.7B-Coder-Distilled-SFT) | 302 |
|
||||||
|
| [Qwen3-0.6B-Distilled-30B-A3B-Thinking-SFT-GGUF](https://huggingface.co/reaperdoesntknow/Qwen3-0.6B-Distilled-30B-A3B-Thinking-SFT-GGUF) | 203 |
|
||||||
|
| [Qwen3-1.7B-Coder-Distilled-SFT-GGUF](https://huggingface.co/reaperdoesntknow/Qwen3-1.7B-Coder-Distilled-SFT-GGUF) | 194 |
|
||||||
|
|
||||||
|
**Total Portfolio: 41 models | 2,781 total downloads**
|
||||||
|
|
||||||
|
|
||||||
|
*Last updated: 2026-03-28 12:56 UTC*
|
||||||
|
|
||||||
|
<!-- CIX-CROSSLINK-START -->
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## From the Convergent Intelligence Portfolio
|
||||||
|
|
||||||
|
**[DistilQwen Collection](https://huggingface.co/collections/reaperdoesntknow/distilqwen-69bf40ec669117e3f069ef1c)** — Our only BF16 series. Proof-weighted distillation from Qwen3-30B-A3B → 1.7B and 0.6B on H100. Three teacher variants (Instruct, Thinking, Coder), nine models, 2,788 combined downloads. The rest of the portfolio proves structure beats scale on CPU. This collection shows what happens when you give the methodology real hardware.
|
||||||
|
|
||||||
|
Top model: [Qwen3-1.7B-Coder-Distilled-SFT](https://huggingface.co/reaperdoesntknow/Qwen3-1.7B-Coder-Distilled-SFT) — 508 downloads
|
||||||
|
|
||||||
|
Full methodology: [Structure Over Scale (DOI: 10.57967/hf/8165)](https://doi.org/10.57967/hf/8165)
|
||||||
|
|
||||||
|
*Convergent Intelligence LLC: Research Division*
|
||||||
|
|
||||||
|
<!-- CIX-CROSSLINK-END -->
|
||||||
|
<!-- cix-keeper-ts:2026-05-04T15:25:51Z -->
|
||||||
4
chat_template.jinja
Normal file
4
chat_template.jinja
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
{% for message in messages %}{{'<|im_start|>' + message['role'] + '
|
||||||
|
' + message['content'] + '<|im_end|>' + '
|
||||||
|
'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant
|
||||||
|
' }}{% endif %}
|
||||||
32
config.json
Normal file
32
config.json
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
{
|
||||||
|
"architectures": [
|
||||||
|
"LlamaForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_bias": false,
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"dtype": "float32",
|
||||||
|
"eos_token_id": 2,
|
||||||
|
"head_dim": 64,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 960,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 2560,
|
||||||
|
"is_llama_config": true,
|
||||||
|
"max_position_embeddings": 8192,
|
||||||
|
"mlp_bias": false,
|
||||||
|
"model_type": "llama",
|
||||||
|
"num_attention_heads": 15,
|
||||||
|
"num_hidden_layers": 32,
|
||||||
|
"num_key_value_heads": 5,
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"pretraining_tp": 1,
|
||||||
|
"rms_norm_eps": 1e-05,
|
||||||
|
"rope_interleaved": false,
|
||||||
|
"rope_scaling": null,
|
||||||
|
"rope_theta": 100000,
|
||||||
|
"tie_word_embeddings": true,
|
||||||
|
"transformers_version": "4.56.0",
|
||||||
|
"use_cache": true,
|
||||||
|
"vocab_size": 49152
|
||||||
|
}
|
||||||
9
generation_config.json
Normal file
9
generation_config.json
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
{
|
||||||
|
"_from_model_config": true,
|
||||||
|
"bos_token_id": 1,
|
||||||
|
"eos_token_id": [
|
||||||
|
2
|
||||||
|
],
|
||||||
|
"pad_token_id": 2,
|
||||||
|
"transformers_version": "4.56.0"
|
||||||
|
}
|
||||||
48901
merges.txt
Normal file
48901
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model.safetensors
Normal file
3
model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:36bb97f1fe2620e5e4f9c0dfbdd7438e3b0ef804689df08c56ea127d4a61bdbe
|
||||||
|
size 1447317080
|
||||||
34
special_tokens_map.json
Normal file
34
special_tokens_map.json
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"additional_special_tokens": [
|
||||||
|
"<|im_start|>",
|
||||||
|
"<|im_end|>"
|
||||||
|
],
|
||||||
|
"bos_token": {
|
||||||
|
"content": "<|im_start|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"eos_token": {
|
||||||
|
"content": "<|im_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": {
|
||||||
|
"content": "<|im_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"unk_token": {
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
}
|
||||||
|
}
|
||||||
244963
tokenizer.json
Normal file
244963
tokenizer.json
Normal file
File diff suppressed because it is too large
Load Diff
161
tokenizer_config.json
Normal file
161
tokenizer_config.json
Normal file
@@ -0,0 +1,161 @@
|
|||||||
|
{
|
||||||
|
"add_prefix_space": false,
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"0": {
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"1": {
|
||||||
|
"content": "<|im_start|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"2": {
|
||||||
|
"content": "<|im_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"3": {
|
||||||
|
"content": "<repo_name>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"4": {
|
||||||
|
"content": "<reponame>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"5": {
|
||||||
|
"content": "<file_sep>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"6": {
|
||||||
|
"content": "<filename>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"7": {
|
||||||
|
"content": "<gh_stars>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"8": {
|
||||||
|
"content": "<issue_start>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"9": {
|
||||||
|
"content": "<issue_comment>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"10": {
|
||||||
|
"content": "<issue_closed>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"11": {
|
||||||
|
"content": "<jupyter_start>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"12": {
|
||||||
|
"content": "<jupyter_text>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"13": {
|
||||||
|
"content": "<jupyter_code>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"14": {
|
||||||
|
"content": "<jupyter_output>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"15": {
|
||||||
|
"content": "<jupyter_script>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"16": {
|
||||||
|
"content": "<empty_output>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additional_special_tokens": [
|
||||||
|
"<|im_start|>",
|
||||||
|
"<|im_end|>"
|
||||||
|
],
|
||||||
|
"bos_token": "<|im_start|>",
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "<|im_end|>",
|
||||||
|
"extra_special_tokens": {},
|
||||||
|
"max_length": 512,
|
||||||
|
"model_max_length": 8192,
|
||||||
|
"pad_to_multiple_of": null,
|
||||||
|
"pad_token": "<|im_end|>",
|
||||||
|
"pad_token_type_id": 0,
|
||||||
|
"padding_side": "right",
|
||||||
|
"stride": 0,
|
||||||
|
"tokenizer_class": "GPT2Tokenizer",
|
||||||
|
"truncation_side": "right",
|
||||||
|
"truncation_strategy": "longest_first",
|
||||||
|
"unk_token": "<|endoftext|>",
|
||||||
|
"vocab_size": 49152
|
||||||
|
}
|
||||||
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user