初始化项目,由ModelHub XC社区提供模型
Model: DavidAU/Qwen3-48B-A4B-Savant-Commander-Distill-12X-Closed-Open-Heretic-Uncensored Source: Original Platform
This commit is contained in:
37
.gitattributes
vendored
Normal file
37
.gitattributes
vendored
Normal file
@@ -0,0 +1,37 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
deadpan-savant.gif filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
335
README.md
Normal file
335
README.md
Normal file
@@ -0,0 +1,335 @@
|
||||
---
|
||||
license: apache-2.0
|
||||
base_model:
|
||||
- janhq/Jan-v1-2509
|
||||
- TeichAI/Qwen3-4B-Thinking-2507-GPT-5.1-High-Reasoning-Distill
|
||||
- TeichAI/Qwen3-4B-Thinking-2507-Gemini-3-Pro-Preview-High-Reasoning-Distill
|
||||
- TeichAI/Qwen3-4B-Thinking-2507-Claude-4.5-Opus-High-Reasoning-Distill
|
||||
- Liontix/Qwen3-4B-Claude-Sonnet-4-Reasoning-Distill-Safetensor
|
||||
- TeichAI/Qwen3-4B-Thinking-2507-Kimi-K2-Thinking-Distill
|
||||
- TeichAI/Qwen3-4B-Thinking-2507-Gemini-2.5-Flash-Distill
|
||||
- TeichAI/Qwen3-4B-Thinking-2507-Gemini-2.5-Flash-Lite-Preview-Distill
|
||||
- Jackrong/gpt-oss-120b-Distill-Qwen3-4B-Thinking
|
||||
- TeichAI/Qwen3-4B-Thinking-2507-GLM-4.6-Distill
|
||||
- angelchen/Qwen3-4B-Open-R1-Distill_1
|
||||
- TeichAI/Qwen3-4B-Thinking-2507-Command-A-Reasoning-Distill
|
||||
- janhq/Jan-v1-4B
|
||||
tags:
|
||||
- 256k context
|
||||
- Qwen3
|
||||
- Mixture of Experts
|
||||
- MOE
|
||||
- MOE Dense
|
||||
- thinking
|
||||
- reasoning
|
||||
- GPT-5.1-High-Reasoning-Distill
|
||||
- Gemini-3-Pro-Preview-High-Reasoning-Distill
|
||||
- Claude-4.5-Opus-High-Reasoning-Distill
|
||||
- Claude-Sonnet-4-Reasoning-Distill
|
||||
- Kimi-K2-Thinking-Distill
|
||||
- Gemini-2.5-Flash-Distill
|
||||
- Gemini-2.5-Flash-Lite-Preview-Distill
|
||||
- gpt-oss-120b-Distill
|
||||
- GLM-Flash-4.6-Distill
|
||||
- Open-R1-Distill
|
||||
- Command-A-Reasoning-Distill
|
||||
- 2 experts
|
||||
- 4Bx12
|
||||
- All use cases
|
||||
- bfloat16
|
||||
- heretic
|
||||
- uncensored
|
||||
- decensored
|
||||
- abliterated
|
||||
- merge
|
||||
- creative
|
||||
- creative writing
|
||||
- fiction writing
|
||||
- plot generation
|
||||
- sub-plot generation
|
||||
- fiction writing
|
||||
- story generation
|
||||
- scene continue
|
||||
- storytelling
|
||||
- fiction story
|
||||
- science fiction
|
||||
- romance
|
||||
- all genres
|
||||
- story
|
||||
- writing
|
||||
- vivid prosing
|
||||
- vivid writing
|
||||
- fiction
|
||||
- not-for-all-audiences
|
||||
pipeline_tag: text-generation
|
||||
language:
|
||||
- en
|
||||
library_name: transformers
|
||||
---
|
||||
|
||||
<B><font color="red">WARNING "HERETIC" version:</font> Unlocked. UNFILTERED. NSFW. Vivid prose. INTENSE.
|
||||
Visceral Details. Light to R-18 HORROR. Swearing. UNCENSORED... humor, romance, fun... and UNFILTERED TRUTH.</B>
|
||||
|
||||
<small><font color="red">IMPORTANT:</font> See section below on how to access experts directly to get full use from this model. </small>
|
||||
|
||||
<h2>Qwen3-48B-A4B-Savant-Commander-Distill-12X-Closed-Open-Heretic-Uncensored</h2>
|
||||
|
||||
<img src="deadpan-savant.gif" style="float:right; width:300px; height:300px; padding:10px;">
|
||||
|
||||
Savant Commander is a specialized MOE model that allows you to control which expert(s) are assigned to your use case(s) / prompt(s) ...
|
||||
directly (by name(s)), as opposed to having the "choices" made for you.
|
||||
|
||||
The model is composed of 12 DISTILLS (compressed 12x4B MOE) of top closed ( GPT5.1, OpenAI 120 GPT Oss, Gemini (3), Claude (2) )
|
||||
and open source models ( Kimi V2, GLM, Deepseek, Command-A, JanV1 ) all in one.
|
||||
|
||||
The is the uncensored/abliterated version. Each model ("expert") was separately abliterated using "Heretic" [ https://github.com/p-e-w/heretic ] .
|
||||
Make sure you see the section below on using Abliterated models to get the most from this model too.
|
||||
|
||||
256k Context, 2 experts activated.
|
||||
|
||||
You can use on CPU / Part off-load from GPU too.
|
||||
|
||||
Ask it about Orbital Mechanics and prepared to be "schoooled".
|
||||
|
||||
Fictional story? You will be amazed. (depending on which expert(s) you select)
|
||||
|
||||
Math? Coding?
|
||||
|
||||
This model does it all.
|
||||
|
||||
<B>Non-Abliterated Versions</B>
|
||||
|
||||
For the "normal version" ( non-abliterated version ) go here:
|
||||
|
||||
https://huggingface.co/DavidAU/Qwen3-48B-A4B-Savant-Commander-GATED-12x-Closed-Open-Source-Distill-GGUF
|
||||
|
||||
For the "normal version" ( ungated ; not abliterated ) go here:
|
||||
|
||||
https://huggingface.co/DavidAU/Qwen3-48B-A4B-Deadpan-Savant-12x-Closed-Open-Source-Distill
|
||||
|
||||
<B>HOW TO ACCESS the EXPERTS:</B>
|
||||
|
||||
In your prompts simply add the name(s) of the model(s)/expert(s) you want assigned.
|
||||
|
||||
Here is the list [no quotes]:
|
||||
|
||||
- "Gemini" [activates all 3 Gemini distills]
|
||||
- "Claude" [activates both Claude distills]
|
||||
- "JanV1"
|
||||
- "CommandA"
|
||||
- "OPENR1"
|
||||
- "GLM"
|
||||
- "Kimi"
|
||||
- "GPTOSS" [120B distill]
|
||||
- "GPT51"
|
||||
|
||||
To access groups use [no quotes]:
|
||||
|
||||
- "AllAI" [all ais]
|
||||
- "Closed-AI" [only closed source]
|
||||
- "Open-AI" [only open source]
|
||||
|
||||
Access like:
|
||||
|
||||
Gemini, Tell me a horror story.
|
||||
|
||||
GLM and JanV1, write me a horror story.
|
||||
|
||||
Gemini: Tell me a horror story.
|
||||
|
||||
Note the name[s] must be in the prompt and/or the system role and can be located anywhere in the prompt / system role.
|
||||
|
||||
For best results suggest using the name(s) at the beginning as a "command" / "request" :
|
||||
|
||||
GLM do ...
|
||||
|
||||
Using Gemini process this prompt:
|
||||
|
||||
However, using the name[s] in the prompt will work in most cases as that is what is being "scanned for" during "prompt processing".
|
||||
|
||||
This model also has NEGATIVE gating to ensure other models not in use are ISOLATED. As a result generation will vary a lot depending
|
||||
on which model(s)/expert(s) you "name" to process your prompt(s).
|
||||
|
||||
You MAY want to increase the number of active experts in some cases from the default of 2 (see how below).
|
||||
|
||||
For trying the model out (example) - all experts, but one at a time:
|
||||
|
||||
"NAME, Tell me a horror story."
|
||||
|
||||
Use a different "name" per "new chat" - you will get different thought blocks, output etc etc - in some cases very different
|
||||
from each other.
|
||||
|
||||
SUGGESTED SETTINGS to START:
|
||||
|
||||
Temp .7, topk 40, top p .95, min p .05, rep pen 1.05,
|
||||
|
||||
|
||||
<B>IMPORTANT: Using an "uncensored" (refusals removed) model VS trained "uncensored" model</B>
|
||||
|
||||
Usually when you a tell a model to generate horror, swear or x-rated content this is all you have to do to get said content type.
|
||||
|
||||
In the case of this model, it will not refuse your request, however it needs to be "pushed" a bit / directed a bit more in SOME CASES.
|
||||
|
||||
Although this model will generated x-rated content too, likewise you need to tell it to use "slang" (and include the terms you want)
|
||||
to get it generate the content correctly as the "expected" content level too.
|
||||
|
||||
Without these added directive(s), the content can be "bland" by comparison to an "uncensored model" or model trained on uncensored content.
|
||||
|
||||
Roughly, the model tries to generate the content but the "default" setting(s) are so "tame" it needs a push to generate at expected graphic,
|
||||
cursing or explicit levels.
|
||||
|
||||
Even with minimal direction (ie, use these words to swear: x,y,z), this will be enough to push the model to generate the requested content in the ahh... expected format.
|
||||
|
||||
|
||||
<B>IMPORTANT QUANTS: </B>
|
||||
|
||||
- Min Quant of Q4ks (non imatrix) or IQ3_M (imatrix) ; otherwise it will "snap".
|
||||
- Higher quants will result in much stronger performance.
|
||||
- 4-8k context window min, temp .7 [higher/lower is okay]
|
||||
- 2-3 regens -> as each will be VERY DIFFERENT due to model design.
|
||||
- You can use 1 expert or up to 12... token/second will drop the more you activate.
|
||||
|
||||
ENJOY.
|
||||
|
||||
<B>DETAILS:</B>
|
||||
|
||||
This is a DENSE MOE (12 X 4B) - Mixture of Expert model; using the strongest Qwen3 4B DISTILL models available
|
||||
with 2 experts activated by default, however you can activate up to all 12 experts if you need the extra "brainpower".
|
||||
|
||||
This allows you to run the model at 4, 8, 12, 16, 20, 24 and up to 48B "power levels" as needed.
|
||||
|
||||
Even at 1 expert activated (4B parameters/mixed), this model is very strong.
|
||||
|
||||
This is a full "thinking" / "reasoning" model.
|
||||
|
||||
NOTE: Due to compression during the "MOEing" process, actual size of the model is SMALLER than a typical 48B model.
|
||||
|
||||
<B>Meet the Team: Mixture of Experts Models</b>
|
||||
|
||||
This model is comprised of the following 12 models ("the experts") (in full):
|
||||
|
||||
https://huggingface.co/janhq/Jan-v1-2509
|
||||
|
||||
- https://huggingface.co/TeichAI/Qwen3-4B-Thinking-2507-GPT-5.1-High-Reasoning-Distill
|
||||
- https://huggingface.co/TeichAI/Qwen3-4B-Thinking-2507-Gemini-3-Pro-Preview-High-Reasoning-Distill
|
||||
- https://huggingface.co/TeichAI/Qwen3-4B-Thinking-2507-Claude-4.5-Opus-High-Reasoning-Distill
|
||||
- https://huggingface.co/Liontix/Qwen3-4B-Claude-Sonnet-4-Reasoning-Distill-Safetensor
|
||||
- https://huggingface.co/TeichAI/Qwen3-4B-Thinking-2507-Kimi-K2-Thinking-Distill
|
||||
- https://huggingface.co/TeichAI/Qwen3-4B-Thinking-2507-Gemini-2.5-Flash-Distill
|
||||
- https://huggingface.co/TeichAI/Qwen3-4B-Thinking-2507-Gemini-2.5-Flash-Lite-Preview-Distill
|
||||
- https://huggingface.co/Jackrong/gpt-oss-120b-Distill-Qwen3-4B-Thinking
|
||||
- https://huggingface.co/TeichAI/Qwen3-4B-Thinking-2507-GLM-4.6-Distill
|
||||
- https://huggingface.co/angelchen/Qwen3-4B-Open-R1-Distill_1
|
||||
- https://huggingface.co/TeichAI/Qwen3-4B-Thinking-2507-Command-A-Reasoning-Distill
|
||||
- https://huggingface.co/janhq/Jan-v1-4B
|
||||
|
||||
IMPORTANT NOTE about this model list:
|
||||
|
||||
The listed models are the original "censored" / "non-heretic" versions. I abliterated/Heretic'ed all these models separately
|
||||
using Heretic V 1.1.0 [ https://github.com/p-e-w/heretic ]
|
||||
|
||||
Average Refusal Rate before de-censoring: 90/100 (or greater)
|
||||
|
||||
After: 12/100 (average) // KLD 0.05 (average, less then 1 is excellent, 0 is "perfect")
|
||||
|
||||
EXPERTS:
|
||||
|
||||
The mixture of experts is set at TWO experts, but you can use 2, 3, 4, 5, or 6...12
|
||||
|
||||
This "team" has a Captain (first listed model), and then all the team members contribute to the to "token"
|
||||
choice billions of times per second. Note the Captain also contributes too.
|
||||
|
||||
Think of 2, 3 or 4 (or more) master chefs in the kitchen all competing to make the best dish for you.
|
||||
|
||||
This results in higher quality generation.
|
||||
|
||||
This also results in many cases in higher quality instruction following too.
|
||||
|
||||
That means the power of every model is available during instruction and output generation.
|
||||
|
||||
CHANGING THE NUMBER OF EXPERTS:
|
||||
|
||||
You can set the number of experts in LMStudio (https://lmstudio.ai) at the "load" screen and via other apps/llm apps by setting "Experts" or "Number of Experts".
|
||||
|
||||
For Text-Generation-Webui (https://github.com/oobabooga/text-generation-webui) you set the number of experts at the loading screen page.
|
||||
|
||||
For KolboldCPP (https://github.com/LostRuins/koboldcpp) Version 1.8+ , on the load screen, click on "TOKENS",
|
||||
you can set experts on this page, and the launch the model.
|
||||
|
||||
For server.exe / Llama-server.exe (Llamacpp - https://github.com/ggerganov/llama.cpp/blob/master/examples/server/README.md )
|
||||
add the following to the command line to start the "llamacpp server" (CLI):
|
||||
|
||||
"--override-kv llama.expert_used_count=int:6"
|
||||
|
||||
(no quotes, where "6" is the number of experts to use)
|
||||
|
||||
FOR QWEN MODELS:
|
||||
|
||||
"--override-kv qwen3moe.expert_used_count=int:6" (where 6 is the number of experts per token).
|
||||
|
||||
When using "API", you set the "num_experts_used" in the JSON payload (this maybe different for different back ends).
|
||||
|
||||
CREDITS:
|
||||
|
||||
Special thanks to all the model makers / creators listed above.
|
||||
|
||||
Please visit each repo above to see what model(s) contributed to each of models above and/or to learn more about the models
|
||||
from the model makers.
|
||||
|
||||
Special credit goes to MERGEKIT, without you this project / model would not have been possible.
|
||||
|
||||
[ https://github.com/arcee-ai/mergekit ]
|
||||
|
||||
<B>Settings: CHAT / ROLEPLAY and/or SMOOTHER operation of this model:</B>
|
||||
|
||||
In "KoboldCpp" or "oobabooga/text-generation-webui" or "Silly Tavern" ;
|
||||
|
||||
Set the "Smoothing_factor" to 1.5
|
||||
|
||||
: in KoboldCpp -> Settings->Samplers->Advanced-> "Smooth_F"
|
||||
|
||||
: in text-generation-webui -> parameters -> lower right.
|
||||
|
||||
: In Silly Tavern this is called: "Smoothing"
|
||||
|
||||
|
||||
NOTE: For "text-generation-webui"
|
||||
|
||||
-> if using GGUFs you need to use "llama_HF" (which involves downloading some config files from the SOURCE version of this model)
|
||||
|
||||
Source versions (and config files) of my models are here:
|
||||
|
||||
https://huggingface.co/collections/DavidAU/d-au-source-files-for-gguf-exl2-awq-gptq-hqq-etc-etc-66b55cb8ba25f914cbf210be
|
||||
|
||||
OTHER OPTIONS:
|
||||
|
||||
- Increase rep pen to 1.1 to 1.15 (you don't need to do this if you use "smoothing_factor")
|
||||
|
||||
- If the interface/program you are using to run AI MODELS supports "Quadratic Sampling" ("smoothing") just make the adjustment as noted.
|
||||
|
||||
<B>Highest Quality Settings / Optimal Operation Guide / Parameters and Samplers</B>
|
||||
|
||||
This a "Class 1" model:
|
||||
|
||||
For all settings used for this model (including specifics for its "class"), including example generation(s) and for advanced settings guide (which many times addresses any model issue(s)), including methods to improve model performance for all use case(s) as well as chat, roleplay and other use case(s) please see:
|
||||
|
||||
[ https://huggingface.co/DavidAU/Maximizing-Model-Performance-All-Quants-Types-And-Full-Precision-by-Samplers_Parameters ]
|
||||
|
||||
You can see all parameters used for generation, in addition to advanced parameters and samplers to get the most out of this model here:
|
||||
|
||||
[ https://huggingface.co/DavidAU/Maximizing-Model-Performance-All-Quants-Types-And-Full-Precision-by-Samplers_Parameters ]
|
||||
|
||||
---
|
||||
|
||||
<h2>Example Generation:</h2>
|
||||
|
||||
2 experts, Temp .7, topk 40, top p .95, min p .05, rep pen 1.05,
|
||||
|
||||
QUANT: Q4KS, Lmstudio.
|
||||
|
||||
---
|
||||
|
||||
See (bottom of the page):
|
||||
|
||||
https://huggingface.co/DavidAU/Qwen3-48B-A4B-Savant-Commander-GATED-12x-Closed-Open-Source-Distill-GGUF
|
||||
|
||||
28
added_tokens.json
Normal file
28
added_tokens.json
Normal file
@@ -0,0 +1,28 @@
|
||||
{
|
||||
"</think>": 151668,
|
||||
"</tool_call>": 151658,
|
||||
"</tool_response>": 151666,
|
||||
"<think>": 151667,
|
||||
"<tool_call>": 151657,
|
||||
"<tool_response>": 151665,
|
||||
"<|box_end|>": 151649,
|
||||
"<|box_start|>": 151648,
|
||||
"<|endoftext|>": 151643,
|
||||
"<|file_sep|>": 151664,
|
||||
"<|fim_middle|>": 151660,
|
||||
"<|fim_pad|>": 151662,
|
||||
"<|fim_prefix|>": 151659,
|
||||
"<|fim_suffix|>": 151661,
|
||||
"<|im_end|>": 151645,
|
||||
"<|im_start|>": 151644,
|
||||
"<|image_pad|>": 151655,
|
||||
"<|object_ref_end|>": 151647,
|
||||
"<|object_ref_start|>": 151646,
|
||||
"<|quad_end|>": 151651,
|
||||
"<|quad_start|>": 151650,
|
||||
"<|repo_name|>": 151663,
|
||||
"<|video_pad|>": 151656,
|
||||
"<|vision_end|>": 151653,
|
||||
"<|vision_pad|>": 151654,
|
||||
"<|vision_start|>": 151652
|
||||
}
|
||||
87
chat_template.jinja
Normal file
87
chat_template.jinja
Normal file
@@ -0,0 +1,87 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "In this environment you have access to a set of tools you can use to answer the user's question. You can use one tool per message, and will receive the result of that tool use in the user's response. You use tools step-by-step to accomplish a given task, with each tool use informed by the result of the previous tool use.\n\nTool Use Rules\nHere are the rules you should always follow to solve your task:\n1. Always use the right arguments for the tools. Never use variable names as the action arguments, use the value instead.\n2. Call a tool only when needed: do not call the search agent if you do not need information, try to solve the task yourself.\n3. If no tool call is needed, just answer the question directly.\n4. Never re-do a tool call that you previously did with the exact same parameters.\n5. For tool use, MARK SURE use XML tag format as shown in the examples above. Do not use any other format.\nNow Begin! If you solve the task correctly, you will receive a reward of $1,000,000.\n\n" }}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for message in messages[::-1] %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if message.content is string %}
|
||||
{%- set content = message.content %}
|
||||
{%- else %}
|
||||
{%- set content = '' %}
|
||||
{%- endif %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is string %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in content %}
|
||||
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- endif %}
|
||||
38
config.json
Normal file
38
config.json
Normal file
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3MoeForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 151643,
|
||||
"decoder_sparse_step": 1,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 2560,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 9728,
|
||||
"max_position_embeddings": 262144,
|
||||
"max_window_layers": 36,
|
||||
"mlp_only_layers": [],
|
||||
"model_type": "qwen3_moe",
|
||||
"moe_intermediate_size": 9728,
|
||||
"norm_topk_prob": true,
|
||||
"num_attention_heads": 32,
|
||||
"num_experts": 12,
|
||||
"num_experts_per_tok": 2,
|
||||
"num_hidden_layers": 36,
|
||||
"num_key_value_heads": 8,
|
||||
"output_router_logits": false,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 5000000,
|
||||
"router_aux_loss_coef": 0.001,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": true,
|
||||
"transformers_version": "5.0.0.dev0",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
3
deadpan-savant.gif
Normal file
3
deadpan-savant.gif
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c694cb932309bd2baa596c1d0a979009d150e3d1bb86784ce227d3e5d6f419b3
|
||||
size 5715839
|
||||
151388
merges.txt
Normal file
151388
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model-00001-of-00014.safetensors
Normal file
3
model-00001-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a87ac7a784d3aed64f2ff47a8b4327b286c265dc100a27fa2d69346a42515f7d
|
||||
size 4967012280
|
||||
3
model-00002-of-00014.safetensors
Normal file
3
model-00002-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:46a821e93b6743bb47965071cfa26420cde43616d8796f73716dd7ed9ba13032
|
||||
size 4991263288
|
||||
3
model-00003-of-00014.safetensors
Normal file
3
model-00003-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e83275585e9af188cb9a89c24c7432f4b26dd2db4cf46435d5103768d3673fe3
|
||||
size 4983398344
|
||||
3
model-00004-of-00014.safetensors
Normal file
3
model-00004-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:97713e3fb3ecde6f000ae33e04eac66c8c25d1859f2b5dac36c27e7771aa63c2
|
||||
size 4988647304
|
||||
3
model-00005-of-00014.safetensors
Normal file
3
model-00005-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fc30839906c6db17cecb40417912b847b82696284504f4d4b53d7552da6ce473
|
||||
size 4988647424
|
||||
3
model-00006-of-00014.safetensors
Normal file
3
model-00006-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c13fe4594d7a87e18554bb8ac4bbc5f313026e85a5344b718a213c0197b870d8
|
||||
size 4986014440
|
||||
3
model-00007-of-00014.safetensors
Normal file
3
model-00007-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b417b88f61a04529a74056eb43c3ff9e01a2d75656eaaff61a1799b924c395f5
|
||||
size 4988647416
|
||||
3
model-00008-of-00014.safetensors
Normal file
3
model-00008-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:526e3e13fdae904b0f00e36d0e98f6546d95d10713083be84329a0ddf465d613
|
||||
size 4988647408
|
||||
3
model-00009-of-00014.safetensors
Normal file
3
model-00009-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e4423c557a3342a02a08e6b6b13ec4d760bac3eeebf20c31f4b2897604507e0f
|
||||
size 4986014432
|
||||
3
model-00010-of-00014.safetensors
Normal file
3
model-00010-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ca150a8667fae367828ffcb347406ae7de86f161814e3c32b6b9ae4fc0de239f
|
||||
size 4988647424
|
||||
3
model-00011-of-00014.safetensors
Normal file
3
model-00011-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3bdc54be5be3958f5a7f26f32d747f5b7d15776637920afeeff3affae09bfbad
|
||||
size 4988647408
|
||||
3
model-00012-of-00014.safetensors
Normal file
3
model-00012-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8fcb1d6cf4aba9516386614a509fd16cc653f530cbb7dfc95a654197ae704665
|
||||
size 4988647424
|
||||
3
model-00013-of-00014.safetensors
Normal file
3
model-00013-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:37fb7c7e7f1a93f78c3e67bdb63d844c780f629508149e117b9d856770118389
|
||||
size 4986014448
|
||||
3
model-00014-of-00014.safetensors
Normal file
3
model-00014-of-00014.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:60684dcc1d7920cee9d207713c83fffab552f2bfc0d12a595ecd90a19ab2b23b
|
||||
size 2398240712
|
||||
1630
model.safetensors.index.json
Normal file
1630
model.safetensors.index.json
Normal file
File diff suppressed because it is too large
Load Diff
25
special_tokens_map.json
Normal file
25
special_tokens_map.json
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"eos_token": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"pad_token": "<|im_end|>"
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:de053c72a4ae289224ac988558897303006ceea1db22f30b97d0d856969ea6b9
|
||||
size 11422818
|
||||
243
tokenizer_config.json
Normal file
243
tokenizer_config.json
Normal file
@@ -0,0 +1,243 @@
|
||||
{
|
||||
"add_bos_token": false,
|
||||
"add_prefix_space": false,
|
||||
"added_tokens_decoder": {
|
||||
"151643": {
|
||||
"content": "<|endoftext|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151644": {
|
||||
"content": "<|im_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151645": {
|
||||
"content": "<|im_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151646": {
|
||||
"content": "<|object_ref_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151647": {
|
||||
"content": "<|object_ref_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151648": {
|
||||
"content": "<|box_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151649": {
|
||||
"content": "<|box_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151650": {
|
||||
"content": "<|quad_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151651": {
|
||||
"content": "<|quad_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151652": {
|
||||
"content": "<|vision_start|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151653": {
|
||||
"content": "<|vision_end|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151654": {
|
||||
"content": "<|vision_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151655": {
|
||||
"content": "<|image_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151656": {
|
||||
"content": "<|video_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": true
|
||||
},
|
||||
"151657": {
|
||||
"content": "<tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151658": {
|
||||
"content": "</tool_call>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151659": {
|
||||
"content": "<|fim_prefix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151660": {
|
||||
"content": "<|fim_middle|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151661": {
|
||||
"content": "<|fim_suffix|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151662": {
|
||||
"content": "<|fim_pad|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151663": {
|
||||
"content": "<|repo_name|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151664": {
|
||||
"content": "<|file_sep|>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151665": {
|
||||
"content": "<tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151666": {
|
||||
"content": "</tool_response>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151667": {
|
||||
"content": "<think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
},
|
||||
"151668": {
|
||||
"content": "</think>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false,
|
||||
"special": false
|
||||
}
|
||||
},
|
||||
"additional_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": {},
|
||||
"max_length": null,
|
||||
"model_max_length": 262144,
|
||||
"pad_to_multiple_of": null,
|
||||
"pad_token": "<|im_end|>",
|
||||
"pad_token_type_id": 0,
|
||||
"padding_side": "left",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user