初始化项目,由ModelHub XC社区提供模型
Model: Xiaojian9992024/Qwen2.5-THREADRIPPER-Small Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
262
README.md
Normal file
262
README.md
Normal file
@@ -0,0 +1,262 @@
|
||||
---
|
||||
language:
|
||||
- zho
|
||||
- eng
|
||||
- fra
|
||||
- spa
|
||||
- por
|
||||
- deu
|
||||
- ita
|
||||
- rus
|
||||
- jpn
|
||||
- kor
|
||||
- vie
|
||||
- tha
|
||||
- ara
|
||||
tags:
|
||||
- mergekit
|
||||
- merge
|
||||
- funny
|
||||
- conversational
|
||||
- text-generation
|
||||
- chihuahua-powerful
|
||||
- boolean-expression-champion
|
||||
- math-avoider
|
||||
- object-counting-struggler
|
||||
base_model:
|
||||
- Qwen/Qwen2.5-7B-Instruct
|
||||
- fblgit/cybertron-v4-qw7B-MGS
|
||||
- huihui-ai/Qwen2.5-7B-Instruct-abliterated-v3
|
||||
- FreedomIntelligence/HuatuoGPT-o1-7B
|
||||
- rombodawg/Rombos-LLM-V2.5-Qwen-7b
|
||||
model-index:
|
||||
- name: Qwen2.5-THREADRIPPER-Small
|
||||
results:
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: IFEval (0-Shot)
|
||||
type: HuggingFaceH4/ifeval
|
||||
args:
|
||||
num_few_shot: 0
|
||||
metrics:
|
||||
- type: inst_level_strict_acc and prompt_level_strict_acc
|
||||
value: 76.89
|
||||
name: strict accuracy
|
||||
source:
|
||||
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Xiaojian9992024/Qwen2.5-THREADRIPPER-Small
|
||||
name: Open LLM Leaderboard
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: BBH (3-Shot)
|
||||
type: BBH
|
||||
args:
|
||||
num_few_shot: 3
|
||||
metrics:
|
||||
- type: acc_norm
|
||||
value: 35.79
|
||||
name: normalized accuracy
|
||||
source:
|
||||
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Xiaojian9992024/Qwen2.5-THREADRIPPER-Small
|
||||
name: Open LLM Leaderboard
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: MATH Lvl 5 (4-Shot)
|
||||
type: hendrycks/competition_math
|
||||
args:
|
||||
num_few_shot: 4
|
||||
metrics:
|
||||
- type: exact_match
|
||||
value: 47.36
|
||||
name: exact match
|
||||
source:
|
||||
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Xiaojian9992024/Qwen2.5-THREADRIPPER-Small
|
||||
name: Open LLM Leaderboard
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: GPQA (0-shot)
|
||||
type: Idavidrein/gpqa
|
||||
args:
|
||||
num_few_shot: 0
|
||||
metrics:
|
||||
- type: acc_norm
|
||||
value: 8.05
|
||||
name: acc_norm
|
||||
source:
|
||||
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Xiaojian9992024/Qwen2.5-THREADRIPPER-Small
|
||||
name: Open LLM Leaderboard
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: MuSR (0-shot)
|
||||
type: TAUR-Lab/MuSR
|
||||
args:
|
||||
num_few_shot: 0
|
||||
metrics:
|
||||
- type: acc_norm
|
||||
value: 13.93
|
||||
name: acc_norm
|
||||
source:
|
||||
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Xiaojian9992024/Qwen2.5-THREADRIPPER-Small
|
||||
name: Open LLM Leaderboard
|
||||
- task:
|
||||
type: text-generation
|
||||
name: Text Generation
|
||||
dataset:
|
||||
name: MMLU-PRO (5-shot)
|
||||
type: TIGER-Lab/MMLU-Pro
|
||||
config: main
|
||||
split: test
|
||||
args:
|
||||
num_few_shot: 5
|
||||
metrics:
|
||||
- type: acc
|
||||
value: 37.3
|
||||
name: accuracy
|
||||
source:
|
||||
url: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard?query=Xiaojian9992024/Qwen2.5-THREADRIPPER-Small
|
||||
name: Open LLM Leaderboard
|
||||
---
|
||||
base_model:
|
||||
- fblgit/cybertron-v4-qw7B-MGS
|
||||
- huihui-ai/Qwen2.5-7B-Instruct-abliterated-v3
|
||||
- FreedomIntelligence/HuatuoGPT-o1-7B
|
||||
- rombodawg/Rombos-LLM-V2.5-Qwen-7b
|
||||
- Qwen/Qwen2.5-7B-Instruct
|
||||
library_name: transformers
|
||||
tags:
|
||||
- mergekit
|
||||
- merge
|
||||
- funny
|
||||
- conversational
|
||||
- text-generation
|
||||
- chihuahua-powerful
|
||||
- boolean-expression-champion
|
||||
- math-avoider
|
||||
- object-counting-struggler
|
||||
|
||||
---
|
||||
# Xiaojian9992024/Qwen2.5-THREADRIPPER-Small - The "Small" is Just for Show (and Benchmarks)
|
||||
|
||||
## Model Description:
|
||||
|
||||
Behold, the Qwen2.5-THREADRIPPER-Small! Don't let the "Small" in the name fool you; this model is **compactly powerful**... in the same way a chihuahua is "compactly powerful." We merged a bunch of models together using some fancy algorithm called "Linear DELLA" because, frankly, we thought it sounded cool. Did it work? Well...
|
||||
|
||||
Think of this model as the Frankenstein's monster of language models, but instead of being scary, it's just... kinda there. It's built upon the mighty Qwen2.5-7B-Instruct, and then we threw in a cybernetic one, an "abliterated" one (we're not sure what that means either), one that's good at medical stuff (maybe it can diagnose your code?), and another one named "Rombos" because why not?
|
||||
|
||||
## Merge Details
|
||||
### Merge Method
|
||||
|
||||
This model was merged using the [Linear DELLA](https://arxiv.org/abs/2406.11617) merge method using [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) as a base, because "Linear DELLA" sounded like it knew what it was doing. (Spoiler: jury's still out).
|
||||
|
||||
### Models Merged
|
||||
|
||||
We lovingly stitched together the following models to create this... unique entity:
|
||||
* [fblgit/cybertron-v4-qw7B-MGS](https://huggingface.co/fblgit/cybertron-v4-qw7B-MGS) - For that cybernetic edge (we hope).
|
||||
* [huihui-ai/Qwen2.5-7B-Instruct-abliterated-v3](https://huggingface.co/huihui-ai/Qwen2.5-7B-Instruct-abliterated-v3) - "Abliterated" - sounds intense, right? We're banking on it.
|
||||
* [FreedomIntelligence/HuatuoGPT-o1-7B](https://huggingface.co/FreedomIntelligence/HuatuoGPT-o1-7B) - Maybe it can give your code a check-up? (Probably not).
|
||||
* [rombodawg/Rombos-LLM-V2.5-Qwen-7b](https://huggingface.co/rombodawg/Rombos-LLM-V2.5-Qwen-7b) - Because every good merge needs a "Rombos".
|
||||
* [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) - Our solid, dependable base. Relatively speaking.
|
||||
|
||||
### Benchmarks - Prepare for a Wild Ride (Mostly Downhill):
|
||||
|
||||
Okay, let's talk numbers. We ran this bad boy on the Open LLM Leaderboard, and the results are... well, they're numbers! Don't stare directly at them for too long.
|
||||
|
||||
* **MMLU "Pro": Accuracy: 43.5%**. Yes, you read that right. It's about as accurate as guessing, but hey, at least it's consistently around 43%! We're aiming for consistency here, folks. Participation trophies for everyone!
|
||||
* **BBH - Boolean Expressions: Accuracy (Normalized): 83.6%**. BOOM! Boolean expressions? Nailed it! Ask it if "true and false or true" is true, and it'll get it right most of the time. Boolean logic? Bring it on! Existential questions? Maybe not.
|
||||
* **BBH - Object Counting: Accuracy (Normalized): 33.6%**. Counting objects? Apparently, this is where our Threadripper trips over its own feet. Maybe it needs glasses? Or perhaps objects are just inherently confusing, even for advanced AI. We blame the objects.
|
||||
* **BBH - Tracking Shuffled Objects (7 objects): Accuracy (Normalized): 14.4%**. Seven objects? Forget about it. Three objects? Still bad (22.4%). Five objects? Slightly less terrible (22%). If you need something tracked, maybe use GPS, not this model. Unless you're tracking Boolean values. Then we're golden.
|
||||
* **GPQA: Accuracy (Normalized): ~30%**. GPQA? More like GP-"Q"-Maybe-"A". It's trying its best, okay? Lower your expectations.
|
||||
* **Math Hard (Algebra, Counting, Geometry, etc.): Exact Match: 0.0%**. Zero. Zilch. Nada. If you need help with your math homework, please, for the love of numbers, use a calculator. Or ask a human. Or a very sophisticated pigeon. Anything but this for math. Seriously, *anything*.
|
||||
|
||||
### Intended Use:
|
||||
|
||||
* **Conversational?** Sure, if you like conversations that are 43.5% accurate on general knowledge and amazing at Boolean expressions but can't count or track objects. It's definitely *a* conversation starter... about the limitations of language models.
|
||||
* **Text Generation?** Absolutely! It generates text. Whether that text is coherent, accurate, or helpful is another question entirely. But it *does* generate text, and sometimes that's the best you can ask for. Think of it as performance art.
|
||||
* **Funny Model Cards?** Clearly, yes. It excels at providing benchmark data that is hilarious when you try to spin it positively. We're leaning into our strengths here.
|
||||
|
||||
### Limitations:
|
||||
|
||||
* Math. Just... math. Avoid math. Run away from math. If you even *think* about math, this model will give you a blank stare and possibly start reciting Boolean expressions for comfort.
|
||||
* Object counting and tracking. Objects are its nemesis. Especially when shuffled. Or when there are more than two. Actually, just avoid objects in general. Stick to abstract concepts.
|
||||
* GPQA. We're still not sure what GPQA is, and neither is the model, apparently. It's a mystery for the ages.
|
||||
* May occasionally hallucinate benchmark scores that are *slightly* better than reality (we're working on our honesty module... or maybe not).
|
||||
|
||||
### How to Use:
|
||||
|
||||
Use responsibly? Or irresponsibly, we're not your boss. Just don't expect it to balance your checkbook or track your keys. For Boolean expressions though? It's your champion. Need to know if "cat is animal AND animal has fur"? This model's got you.
|
||||
|
||||
### Disclaimer:
|
||||
|
||||
Side effects of using this model may include: existential dread, questioning the nature of intelligence, and a sudden urge to count shuffled objects yourself to prove you're better than a language model. Use at your own risk. But hey, at least it's small! And sometimes, small and funny is all you need.
|
||||
|
||||
### Configuration
|
||||
|
||||
The following YAML configuration was used to produce this model (because you asked, not because we understand it):
|
||||
|
||||
```yaml
|
||||
merge_method: della_linear
|
||||
base_model: Qwen/Qwen2.5-7B-Instruct
|
||||
dtype: bfloat16
|
||||
parameters:
|
||||
epsilon: 0.015 # Fine-grain scaling for precision. (Sounds important!)
|
||||
lambda: 1.6 # Strong emphasis on top-performing models. (We aimed high!)
|
||||
normalize: true # Stable parameter integration across models. (Stability is key, even if accuracy isn't)
|
||||
adaptive_merge_parameters:
|
||||
task_weights:
|
||||
tinyArc: 1.75 # Logical reasoning. (For when you need *some* logic)
|
||||
tinyHellaswag: 1.65 # Contextual predictions. (It tries, bless its heart)
|
||||
tinyMMLU: 1.8 # Domain knowledge. (Limited domains, mostly Boolean expressions)
|
||||
tinyTruthfulQA: 2.0 # Prioritize truthful reasoning. (Truthful-ish. Mostly.)
|
||||
tinyTruthfulQA_mc1: 1.85 # Even more truthful-ishness!
|
||||
tinyWinogrande: 1.9 # Advanced reasoning and predictions. (Baby steps in advanced reasoning)
|
||||
IFEval: 2.1 # Instruction-following and multitasking. (Instructions followed loosely. Multitasking? Define "task".)
|
||||
BBH: 2.25 # Complex reasoning. (Boolean expressions are complex, right?)
|
||||
MATH: 2.4 # Mathematical reasoning. (Just kidding. See benchmarks.)
|
||||
GPQA: 2.35 # Factual QA. (Facts are... subjective?)
|
||||
MUSR: 2.3 # Multi-step reasoning. (One step at a time, maybe?)
|
||||
MMLU-PRO: 2.35 # Domain multitask performance. (Boolean expressions. We keep coming back to Boolean expressions.)
|
||||
smoothing_factor: 0.05 # TURN UP THE SMOOTH! (Smoothness is next to godliness, or at least accuracy)
|
||||
models:
|
||||
- model: Qwen/Qwen2.5-7B-Instruct
|
||||
parameters:
|
||||
weight: 0.65 # The heavy lifter (relatively speaking)
|
||||
density: 0.65 # Dense with... something.
|
||||
- model: huihui-ai/Qwen2.5-7B-Instruct-abliterated-v3
|
||||
parameters:
|
||||
weight: 0.1 # A touch of "abliteration" for flavor
|
||||
density: 0.1 # Just a sprinkle
|
||||
- model: rombodawg/Rombos-LLM-V2.5-Qwen-7b
|
||||
parameters:
|
||||
weight: 0.15 # Rombos-ness, now in model form!
|
||||
density: 0.15 # A dash more density
|
||||
- model: fblgit/cybertron-v4-qw7B-MGS
|
||||
parameters:
|
||||
weight: 0.05 # Cybertron! Pew pew! (Performance may vary)
|
||||
density: 0.05 # A smidgen of cybernetics
|
||||
- model: FreedomIntelligence/HuatuoGPT-o1-7B
|
||||
parameters:
|
||||
weight: 0.05 # Medical intelligence? Maybe?
|
||||
density: 0.05 # Homeopathic dose of medical knowledge
|
||||
´´´
|
||||
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
|
||||
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/Xiaojian9992024__Qwen2.5-THREADRIPPER-Small-details)
|
||||
|
||||
| Metric |Value|
|
||||
|-------------------|----:|
|
||||
|Avg. |36.55|
|
||||
|IFEval (0-Shot) |76.89|
|
||||
|BBH (3-Shot) |35.79|
|
||||
|MATH Lvl 5 (4-Shot)|47.36|
|
||||
|GPQA (0-shot) | 8.05|
|
||||
|MuSR (0-shot) |13.93|
|
||||
|MMLU-PRO (5-shot) |37.30|
|
||||
|
||||
24
added_tokens.json
Normal file
24
added_tokens.json
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"</tool_call>": 151658,
|
||||
"<tool_call>": 151657,
|
||||
"<|box_end|>": 151649,
|
||||
"<|box_start|>": 151648,
|
||||
"<|endoftext|>": 151643,
|
||||
"<|file_sep|>": 151664,
|
||||
"<|fim_middle|>": 151660,
|
||||
"<|fim_pad|>": 151662,
|
||||
"<|fim_prefix|>": 151659,
|
||||
"<|fim_suffix|>": 151661,
|
||||
"<|im_end|>": 151645,
|
||||
"<|im_start|>": 151644,
|
||||
"<|image_pad|>": 151655,
|
||||
"<|object_ref_end|>": 151647,
|
||||
"<|object_ref_start|>": 151646,
|
||||
"<|quad_end|>": 151651,
|
||||
"<|quad_start|>": 151650,
|
||||
"<|repo_name|>": 151663,
|
||||
"<|video_pad|>": 151656,
|
||||
"<|vision_end|>": 151653,
|
||||
"<|vision_pad|>": 151654,
|
||||
"<|vision_start|>": 151652
|
||||
}
|
||||
29
config.json
Normal file
29
config.json
Normal file
@@ -0,0 +1,29 @@
|
||||
{
|
||||
"_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
|
||||
"architectures": [
|
||||
"Qwen2ForCausalLM"
|
||||
],
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 151643,
|
||||
"eos_token_id": 151645,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 3584,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 18944,
|
||||
"max_position_embeddings": 32768,
|
||||
"max_window_layers": 28,
|
||||
"model_type": "qwen2",
|
||||
"num_attention_heads": 28,
|
||||
"num_hidden_layers": 28,
|
||||
"num_key_value_heads": 4,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000.0,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": false,
|
||||
"torch_dtype": "bfloat16",
|
||||
"transformers_version": "4.48.2",
|
||||
"use_cache": true,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 152064
|
||||
}
|
||||
43
mergekit_config.yml
Normal file
43
mergekit_config.yml
Normal file
@@ -0,0 +1,43 @@
|
||||
merge_method: della_linear
|
||||
base_model: Qwen/Qwen2.5-7B-Instruct
|
||||
dtype: bfloat16
|
||||
parameters:
|
||||
epsilon: 0.015 # Fine-grain scaling for precision.
|
||||
lambda: 1.6 # Strong emphasis on top-performing models.
|
||||
normalize: true # Stable parameter integration across models.
|
||||
adaptive_merge_parameters:
|
||||
task_weights:
|
||||
tinyArc: 1.75 # Logical reasoning.
|
||||
tinyHellaswag: 1.65 # Contextual predictions.
|
||||
tinyMMLU: 1.8 # Domain knowledge.
|
||||
tinyTruthfulQA: 2.0 # Prioritize truthful reasoning.
|
||||
tinyTruthfulQA_mc1: 1.85
|
||||
tinyWinogrande: 1.9 # Advanced reasoning and predictions.
|
||||
IFEval: 2.1 # Instruction-following and multitasking.
|
||||
BBH: 2.25 # Complex reasoning.
|
||||
MATH: 2.4 # Mathematical reasoning.
|
||||
GPQA: 2.35 # Factual QA.
|
||||
MUSR: 2.3 # Multi-step reasoning.
|
||||
MMLU-PRO: 2.35 # Domain multitask performance.
|
||||
smoothing_factor: 0.05 # TURN UP THE SMOOTH!
|
||||
models:
|
||||
- model: Qwen/Qwen2.5-7B-Instruct
|
||||
parameters:
|
||||
weight: 0.65
|
||||
density: 0.65
|
||||
- model: huihui-ai/Qwen2.5-7B-Instruct-abliterated-v3
|
||||
parameters:
|
||||
weight: 0.1
|
||||
density: 0.1
|
||||
- model: rombodawg/Rombos-LLM-V2.5-Qwen-7b
|
||||
parameters:
|
||||
weight: 0.15
|
||||
density: 0.15
|
||||
- model: fblgit/cybertron-v4-qw7B-MGS
|
||||
parameters:
|
||||
weight: 0.05
|
||||
density: 0.05
|
||||
- model: FreedomIntelligence/HuatuoGPT-o1-7B
|
||||
parameters:
|
||||
weight: 0.05
|
||||
density: 0.05
|
||||
151388
merges.txt
Normal file
151388
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00004.safetensors
Normal file
3
model-00001-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d915c9b47f6ded6680cb3f24449f20b3ac61f0e25b9826efe2dda8e650f3fa7a
|
||||
size 4976698776
|
||||
3
model-00002-of-00004.safetensors
Normal file
3
model-00002-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5ef96ffaa8928cf3d12405020db38aac268061b73acdfe829ee02daa380b6289
|
||||
size 4932751032
|
||||
3
model-00003-of-00004.safetensors
Normal file
3
model-00003-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:63b5f3f76d85c78270e27c9866d3a2c5b9648f526a1ac496c0e39751e75a9b6e
|
||||
size 4991495808
|
||||
3
model-00004-of-00004.safetensors
Normal file
3
model-00004-of-00004.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:15fb5cdf7740cbd6240661cf905498c2a5573e809214ff05665e8edee33484b0
|
||||
size 330326240
|
||||
1
model.safetensors.index.json
Normal file
1
model.safetensors.index.json
Normal file
File diff suppressed because one or more lines are too long
31
special_tokens_map.json
Normal file
31
special_tokens_map.json
Normal file
@@ -0,0 +1,31 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"eos_token": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
|
||||
size 11421896
|
||||
208
tokenizer_config.json
Normal file
208
tokenizer_config.json
Normal file
@@ -0,0 +1,208 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"model_max_length": 131072,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user