初始化项目,由ModelHub XC社区提供模型
Model: Local-Axiom-AI/Chan-0.6B Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.model filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||||
|
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||||
|
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||||
|
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||||
293
README.md
Normal file
293
README.md
Normal file
@@ -0,0 +1,293 @@
|
|||||||
|
---
|
||||||
|
license: mit
|
||||||
|
language:
|
||||||
|
- en
|
||||||
|
base_model:
|
||||||
|
- Qwen/Qwen3-0.6B
|
||||||
|
library_name: transformers
|
||||||
|
tags:
|
||||||
|
- not-for-all-audiences
|
||||||
|
- nsfw
|
||||||
|
- 4chan
|
||||||
|
- uncensored
|
||||||
|
---
|
||||||
|
# Chan-0.6B Model Card
|
||||||
|
|
||||||
|
## Model Overview
|
||||||
|
|
||||||
|
**Model Name:** Chan-0.6B
|
||||||
|
**Version:** 1.0
|
||||||
|
**Model Type:** Transformer language model
|
||||||
|
**Parameter Count:** ~600 M
|
||||||
|
**Base Model:** Qwen‑3‑0.6 B (float16)
|
||||||
|
|
||||||
|
## Training Data
|
||||||
|
|
||||||
|
**Training Data:** ~200 M tokens extracted from 4 Chan posts (public discussion boards)
|
||||||
|
**Training Compute:** 1 × NVIDIA RTX 3090 GPU (FP16) for ~7 days
|
||||||
|
|
||||||
|
## Intended Use
|
||||||
|
|
||||||
|
Chan-0.6B is a chatbot trained on informal internet dialogue. It is suitable for:
|
||||||
|
|
||||||
|
* Low‑cost prototyping of conversational agents.
|
||||||
|
* Academic research into fine‑tuning on noisy dialogue data.
|
||||||
|
* Exploration of 4 Chan‑style language in controlled settings.
|
||||||
|
|
||||||
|
**Not Intended For:**
|
||||||
|
|
||||||
|
* Commercial customer‑facing deployments.
|
||||||
|
* Moderation or content‑sensitive environments.
|
||||||
|
* Use cases where safe, neutral, or factually accurate output is required.
|
||||||
|
|
||||||
|
## Preprocessing
|
||||||
|
|
||||||
|
* No special filtering of offensive words.
|
||||||
|
* Long posts truncated to 1024 tokens (with special token).
|
||||||
|
* No deduplication or filtering for content quality.
|
||||||
|
|
||||||
|
## Limitations & Known Issues
|
||||||
|
|
||||||
|
* **Toxicity:** The model inherits and can amplify offensive language from the 4 Chan corpus.
|
||||||
|
* **Content Bias:** Strongly biased toward the linguistic style, slang, and viewpoints present on 4 Chan.
|
||||||
|
* **Fact-Checking:** Not trained on structured knowledge, outputs may contain hallucinations.
|
||||||
|
* **Safety:** No reinforcement‑learning‑from‑human‑feedback or safe‑alignment training was performed.
|
||||||
|
* **Performance:** Slower than smaller chat models due to 600 M parameters; requires a GPU for real-time inference.
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
```
|
||||||
|
import sys
|
||||||
|
import gc
|
||||||
|
import atexit
|
||||||
|
from pathlib import Path
|
||||||
|
import torch
|
||||||
|
from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||||
|
from flask import Flask, request, jsonify
|
||||||
|
from better_profanity import profanity
|
||||||
|
|
||||||
|
# ------------------ Config ------------------
|
||||||
|
|
||||||
|
CHECKPOINT_DIR = Path("./XL_V2/checkpoint-epoch3").absolute()
|
||||||
|
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||||
|
MAX_LEN = 1024 * 2
|
||||||
|
MAX_NEW_TOKENS = 256 * 2
|
||||||
|
TEMPERATURE = 0.7
|
||||||
|
TOP_P = 0.9
|
||||||
|
TOP_K = 50
|
||||||
|
REPETITION_PENALTY = 1.18
|
||||||
|
|
||||||
|
# Default censor setting (can be overridden per request)
|
||||||
|
CENSOR = False
|
||||||
|
|
||||||
|
profanity.load_censor_words()
|
||||||
|
|
||||||
|
# ------------------ Globals ------------------
|
||||||
|
|
||||||
|
history = []
|
||||||
|
|
||||||
|
# ------------------ Load model ------------------
|
||||||
|
|
||||||
|
print(f"[{DEVICE}] Loading tokenizer & model from {CHECKPOINT_DIR} …")
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT_DIR)
|
||||||
|
if tokenizer.pad_token is None:
|
||||||
|
tokenizer.pad_token = tokenizer.eos_token
|
||||||
|
tokenizer.model_max_length = MAX_LEN
|
||||||
|
|
||||||
|
model = AutoModelForCausalLM.from_pretrained(
|
||||||
|
CHECKPOINT_DIR,
|
||||||
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
|
||||||
|
).to(DEVICE)
|
||||||
|
|
||||||
|
# ------------------ Cleanup on exit ------------------
|
||||||
|
|
||||||
|
def cleanup():
|
||||||
|
global model, tokenizer
|
||||||
|
print("[exit] Cleaning up resources...")
|
||||||
|
if "model" in globals():
|
||||||
|
del model
|
||||||
|
if "tokenizer" in globals():
|
||||||
|
del tokenizer
|
||||||
|
gc.collect()
|
||||||
|
if torch.cuda.is_available():
|
||||||
|
torch.cuda.empty_cache()
|
||||||
|
|
||||||
|
atexit.register(cleanup)
|
||||||
|
|
||||||
|
# ------------------ Helpers ------------------
|
||||||
|
|
||||||
|
def truncate_tokens(encoded, max_len):
|
||||||
|
"""Left-truncate tokenized input to keep newest tokens"""
|
||||||
|
ids = encoded["input_ids"][0]
|
||||||
|
if ids.size(0) <= max_len:
|
||||||
|
return encoded
|
||||||
|
attn = torch.ones_like(ids[-max_len:])
|
||||||
|
return {"input_ids": ids[-max_len:].unsqueeze(0), "attention_mask": attn.unsqueeze(0)}
|
||||||
|
|
||||||
|
def build_prompt(history):
|
||||||
|
"""Construct conversation in dataset format with trailing Assistant cue"""
|
||||||
|
lines = []
|
||||||
|
for msg in history:
|
||||||
|
role = "User" if msg["role"] == "user" else "Assistant"
|
||||||
|
lines.append(f"{role}: {msg['content']}")
|
||||||
|
lines.append("Assistant: ")
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
def generate_response(append_last_user=True, censor_override=None) -> str:
|
||||||
|
"""
|
||||||
|
Generate text like reg script:
|
||||||
|
- Uses global history
|
||||||
|
- Left-truncates context
|
||||||
|
- Extracts Assistant reply
|
||||||
|
- Removes <|endoftext|>
|
||||||
|
- Anti-echo patches
|
||||||
|
- Optional profanity censor ONLY for AI reply
|
||||||
|
"""
|
||||||
|
global history
|
||||||
|
|
||||||
|
if not history or history[-1]["role"] != "user":
|
||||||
|
return ""
|
||||||
|
|
||||||
|
prompt_text = history[-1]["content"]
|
||||||
|
|
||||||
|
if append_last_user and (not history or history[-1]["role"] != "user"):
|
||||||
|
history.append({"role": "user", "content": prompt_text})
|
||||||
|
|
||||||
|
prompt = build_prompt(history)
|
||||||
|
#print(prompt)
|
||||||
|
|
||||||
|
tokens = tokenizer(prompt, return_tensors="pt", truncation=False)
|
||||||
|
tokens = truncate_tokens(tokens, MAX_LEN)
|
||||||
|
tokens = {k: v.to(DEVICE) for k, v in tokens.items()}
|
||||||
|
|
||||||
|
out_ids = model.generate(
|
||||||
|
**tokens,
|
||||||
|
max_new_tokens=MAX_NEW_TOKENS,
|
||||||
|
min_new_tokens=80,
|
||||||
|
do_sample=True,
|
||||||
|
temperature=TEMPERATURE,
|
||||||
|
top_p=TOP_P,
|
||||||
|
top_k=TOP_K,
|
||||||
|
repetition_penalty=REPETITION_PENALTY,
|
||||||
|
eos_token_id=tokenizer.eos_token_id,
|
||||||
|
pad_token_id=tokenizer.pad_token_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
decoded = tokenizer.decode(out_ids[0], skip_special_tokens=False)
|
||||||
|
decoded = decoded.replace("<|endoftext|>", "").strip()
|
||||||
|
|
||||||
|
if "Assistant:" in decoded:
|
||||||
|
assistant_reply = decoded.rsplit("Assistant:", 1)[1].strip()
|
||||||
|
else:
|
||||||
|
assistant_reply = decoded.strip()
|
||||||
|
|
||||||
|
# Anti-echo
|
||||||
|
last_user = history[-1]["content"]
|
||||||
|
if assistant_reply.startswith(last_user):
|
||||||
|
assistant_reply = assistant_reply[len(last_user):].lstrip()
|
||||||
|
if last_user[:20] in assistant_reply[:60]:
|
||||||
|
assistant_reply = assistant_reply.replace(last_user, "").strip()
|
||||||
|
if assistant_reply.startswith(">") and last_user[:20] in assistant_reply:
|
||||||
|
assistant_reply = assistant_reply.split("\n", 1)[-1].strip()
|
||||||
|
assistant_reply = assistant_reply.strip()
|
||||||
|
|
||||||
|
# Append raw (UNCENSORED) assistant reply to history
|
||||||
|
history.append({"role": "assistant", "content": assistant_reply})
|
||||||
|
|
||||||
|
# Apply censor only to returned text, not stored history
|
||||||
|
do_censor = CENSOR if censor_override is None else bool(censor_override)
|
||||||
|
|
||||||
|
if do_censor:
|
||||||
|
return profanity.censor(assistant_reply)
|
||||||
|
|
||||||
|
return assistant_reply
|
||||||
|
|
||||||
|
# ------------------ Flask app ------------------
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
@app.route("/generate", methods=["POST"])
|
||||||
|
def generate_endpoint():
|
||||||
|
global history
|
||||||
|
|
||||||
|
prompt = ""
|
||||||
|
censor_flag = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = request.get_json(silent=True)
|
||||||
|
#print(data)
|
||||||
|
|
||||||
|
if isinstance(data, dict):
|
||||||
|
censor_flag = data.get("censor", None)
|
||||||
|
|
||||||
|
# ChatML-like messages mode
|
||||||
|
if "messages" in data and isinstance(data["messages"], list):
|
||||||
|
history = []
|
||||||
|
for msg in data["messages"]:
|
||||||
|
if "role" in msg and "content" in msg:
|
||||||
|
history.append({"role": msg["role"], "content": msg["content"]})
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
"response": generate_response(
|
||||||
|
append_last_user=False,
|
||||||
|
censor_override=censor_flag
|
||||||
|
)
|
||||||
|
})
|
||||||
|
|
||||||
|
# simple prompt mode
|
||||||
|
prompt = str(data.get("prompt", "")).strip()
|
||||||
|
|
||||||
|
elif isinstance(data, str):
|
||||||
|
prompt = data.strip()
|
||||||
|
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not prompt:
|
||||||
|
prompt = request.data.decode("utf-8").strip()
|
||||||
|
|
||||||
|
if not prompt:
|
||||||
|
return jsonify({"error": "Missing prompt"}), 400
|
||||||
|
|
||||||
|
# Add user message
|
||||||
|
history.append({"role": "user", "content": prompt})
|
||||||
|
|
||||||
|
# Generate response
|
||||||
|
try:
|
||||||
|
answer = generate_response(
|
||||||
|
append_last_user=False,
|
||||||
|
censor_override=censor_flag
|
||||||
|
)
|
||||||
|
return jsonify({"response": answer})
|
||||||
|
except Exception as exc:
|
||||||
|
sys.stderr.write(str(exc) + "\n")
|
||||||
|
return jsonify({"error": "Internal server error"}), 500
|
||||||
|
|
||||||
|
@app.route("/health", methods=["GET"])
|
||||||
|
def health():
|
||||||
|
return jsonify({"status": "ok"})
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
parser = argparse.ArgumentParser(description="Run Flask LLM API")
|
||||||
|
parser.add_argument("--host", default="0.0.0.0", help="Hostname")
|
||||||
|
parser.add_argument("--port", type=int, default=8000, help="Port")
|
||||||
|
args = parser.parse_args()
|
||||||
|
app.run(host=args.host, port=args.port)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Hardware:**
|
||||||
|
|
||||||
|
* Requires at least a single 12 GB GPU for inference. CPU inference is possible but slow.
|
||||||
|
|
||||||
|
**Safety:**
|
||||||
|
|
||||||
|
* We strongly recommend applying a toxicity filter or reinforcement learning-based safety policy before deployment.
|
||||||
|
|
||||||
|
## License
|
||||||
|
|
||||||
|
MIT (see [LICENSE file](LICENSE))
|
||||||
|
|
||||||
|
## Disclaimer
|
||||||
|
|
||||||
|
This model was trained on user-generated internet content that includes hate speech, harassment, and extremist viewpoints. The creators do not endorse or support these views. The model should be used responsibly, with proper safety safeguards, and in compliance with all local laws and platform policies.
|
||||||
28
added_tokens.json
Normal file
28
added_tokens.json
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
{
|
||||||
|
"</think>": 151668,
|
||||||
|
"</tool_call>": 151658,
|
||||||
|
"</tool_response>": 151666,
|
||||||
|
"<think>": 151667,
|
||||||
|
"<tool_call>": 151657,
|
||||||
|
"<tool_response>": 151665,
|
||||||
|
"<|box_end|>": 151649,
|
||||||
|
"<|box_start|>": 151648,
|
||||||
|
"<|endoftext|>": 151643,
|
||||||
|
"<|file_sep|>": 151664,
|
||||||
|
"<|fim_middle|>": 151660,
|
||||||
|
"<|fim_pad|>": 151662,
|
||||||
|
"<|fim_prefix|>": 151659,
|
||||||
|
"<|fim_suffix|>": 151661,
|
||||||
|
"<|im_end|>": 151645,
|
||||||
|
"<|im_start|>": 151644,
|
||||||
|
"<|image_pad|>": 151655,
|
||||||
|
"<|object_ref_end|>": 151647,
|
||||||
|
"<|object_ref_start|>": 151646,
|
||||||
|
"<|quad_end|>": 151651,
|
||||||
|
"<|quad_start|>": 151650,
|
||||||
|
"<|repo_name|>": 151663,
|
||||||
|
"<|video_pad|>": 151656,
|
||||||
|
"<|vision_end|>": 151653,
|
||||||
|
"<|vision_pad|>": 151654,
|
||||||
|
"<|vision_start|>": 151652
|
||||||
|
}
|
||||||
85
chat_template.jinja
Normal file
85
chat_template.jinja
Normal file
@@ -0,0 +1,85 @@
|
|||||||
|
{%- if tools %}
|
||||||
|
{{- '<|im_start|>system\n' }}
|
||||||
|
{%- if messages[0].role == 'system' %}
|
||||||
|
{{- messages[0].content + '\n\n' }}
|
||||||
|
{%- endif %}
|
||||||
|
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||||
|
{%- for tool in tools %}
|
||||||
|
{{- "\n" }}
|
||||||
|
{{- tool | tojson }}
|
||||||
|
{%- endfor %}
|
||||||
|
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||||
|
{%- else %}
|
||||||
|
{%- if messages[0].role == 'system' %}
|
||||||
|
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||||
|
{%- endif %}
|
||||||
|
{%- endif %}
|
||||||
|
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||||
|
{%- for message in messages[::-1] %}
|
||||||
|
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||||
|
{%- if ns.multi_step_tool and message.role == "user" and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
|
||||||
|
{%- set ns.multi_step_tool = false %}
|
||||||
|
{%- set ns.last_query_index = index %}
|
||||||
|
{%- endif %}
|
||||||
|
{%- endfor %}
|
||||||
|
{%- for message in messages %}
|
||||||
|
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||||
|
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
||||||
|
{%- elif message.role == "assistant" %}
|
||||||
|
{%- set content = message.content %}
|
||||||
|
{%- set reasoning_content = '' %}
|
||||||
|
{%- if message.reasoning_content is defined and message.reasoning_content is not none %}
|
||||||
|
{%- set reasoning_content = message.reasoning_content %}
|
||||||
|
{%- else %}
|
||||||
|
{%- if '</think>' in message.content %}
|
||||||
|
{%- set content = message.content.split('</think>')[-1].lstrip('\n') %}
|
||||||
|
{%- set reasoning_content = message.content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
||||||
|
{%- endif %}
|
||||||
|
{%- endif %}
|
||||||
|
{%- if loop.index0 > ns.last_query_index %}
|
||||||
|
{%- if loop.last or (not loop.last and reasoning_content) %}
|
||||||
|
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||||
|
{%- else %}
|
||||||
|
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||||
|
{%- endif %}
|
||||||
|
{%- else %}
|
||||||
|
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||||
|
{%- endif %}
|
||||||
|
{%- if message.tool_calls %}
|
||||||
|
{%- for tool_call in message.tool_calls %}
|
||||||
|
{%- if (loop.first and content) or (not loop.first) %}
|
||||||
|
{{- '\n' }}
|
||||||
|
{%- endif %}
|
||||||
|
{%- if tool_call.function %}
|
||||||
|
{%- set tool_call = tool_call.function %}
|
||||||
|
{%- endif %}
|
||||||
|
{{- '<tool_call>\n{"name": "' }}
|
||||||
|
{{- tool_call.name }}
|
||||||
|
{{- '", "arguments": ' }}
|
||||||
|
{%- if tool_call.arguments is string %}
|
||||||
|
{{- tool_call.arguments }}
|
||||||
|
{%- else %}
|
||||||
|
{{- tool_call.arguments | tojson }}
|
||||||
|
{%- endif %}
|
||||||
|
{{- '}\n</tool_call>' }}
|
||||||
|
{%- endfor %}
|
||||||
|
{%- endif %}
|
||||||
|
{{- '<|im_end|>\n' }}
|
||||||
|
{%- elif message.role == "tool" %}
|
||||||
|
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||||
|
{{- '<|im_start|>user' }}
|
||||||
|
{%- endif %}
|
||||||
|
{{- '\n<tool_response>\n' }}
|
||||||
|
{{- message.content }}
|
||||||
|
{{- '\n</tool_response>' }}
|
||||||
|
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||||
|
{{- '<|im_end|>\n' }}
|
||||||
|
{%- endif %}
|
||||||
|
{%- endif %}
|
||||||
|
{%- endfor %}
|
||||||
|
{%- if add_generation_prompt %}
|
||||||
|
{{- '<|im_start|>assistant\n' }}
|
||||||
|
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||||
|
{{- '<think>\n\n</think>\n\n' }}
|
||||||
|
{%- endif %}
|
||||||
|
{%- endif %}
|
||||||
60
config.json
Normal file
60
config.json
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
{
|
||||||
|
"architectures": [
|
||||||
|
"Qwen3ForCausalLM"
|
||||||
|
],
|
||||||
|
"attention_bias": false,
|
||||||
|
"attention_dropout": 0.0,
|
||||||
|
"bos_token_id": 151643,
|
||||||
|
"dtype": "float32",
|
||||||
|
"eos_token_id": 151643,
|
||||||
|
"head_dim": 128,
|
||||||
|
"hidden_act": "silu",
|
||||||
|
"hidden_size": 1024,
|
||||||
|
"initializer_range": 0.02,
|
||||||
|
"intermediate_size": 3072,
|
||||||
|
"layer_types": [
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention",
|
||||||
|
"full_attention"
|
||||||
|
],
|
||||||
|
"max_position_embeddings": 32768,
|
||||||
|
"max_window_layers": 28,
|
||||||
|
"model_type": "qwen3",
|
||||||
|
"num_attention_heads": 16,
|
||||||
|
"num_hidden_layers": 28,
|
||||||
|
"num_key_value_heads": 8,
|
||||||
|
"rms_norm_eps": 1e-06,
|
||||||
|
"rope_scaling": null,
|
||||||
|
"rope_theta": 1000000,
|
||||||
|
"sliding_window": null,
|
||||||
|
"tie_word_embeddings": true,
|
||||||
|
"transformers_version": "4.57.1",
|
||||||
|
"use_cache": true,
|
||||||
|
"use_sliding_window": false,
|
||||||
|
"vocab_size": 151669
|
||||||
|
}
|
||||||
6
generation_config.json
Normal file
6
generation_config.json
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
{
|
||||||
|
"bos_token_id": 151643,
|
||||||
|
"eos_token_id": 151643,
|
||||||
|
"max_new_tokens": 2048,
|
||||||
|
"transformers_version": "4.57.1"
|
||||||
|
}
|
||||||
151388
merges.txt
Normal file
151388
merges.txt
Normal file
File diff suppressed because it is too large
Load Diff
3
model.safetensors
Normal file
3
model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:9a5dd3b5202305e2cf5032227d766158099143c08496c4b9e7a6e8b9d7e2b8cf
|
||||||
|
size 2383141336
|
||||||
25
special_tokens_map.json
Normal file
25
special_tokens_map.json
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
{
|
||||||
|
"additional_special_tokens": [
|
||||||
|
"<|im_start|>",
|
||||||
|
"<|im_end|>",
|
||||||
|
"<|object_ref_start|>",
|
||||||
|
"<|object_ref_end|>",
|
||||||
|
"<|box_start|>",
|
||||||
|
"<|box_end|>",
|
||||||
|
"<|quad_start|>",
|
||||||
|
"<|quad_end|>",
|
||||||
|
"<|vision_start|>",
|
||||||
|
"<|vision_end|>",
|
||||||
|
"<|vision_pad|>",
|
||||||
|
"<|image_pad|>",
|
||||||
|
"<|video_pad|>"
|
||||||
|
],
|
||||||
|
"eos_token": {
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false
|
||||||
|
},
|
||||||
|
"pad_token": "<|endoftext|>"
|
||||||
|
}
|
||||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
version https://git-lfs.github.com/spec/v1
|
||||||
|
oid sha256:c760c4e715b8fa39254607191eca619d9b0612d5b29d3002ac512a5b6cad7d55
|
||||||
|
size 11422934
|
||||||
239
tokenizer_config.json
Normal file
239
tokenizer_config.json
Normal file
@@ -0,0 +1,239 @@
|
|||||||
|
{
|
||||||
|
"add_bos_token": false,
|
||||||
|
"add_prefix_space": false,
|
||||||
|
"added_tokens_decoder": {
|
||||||
|
"151643": {
|
||||||
|
"content": "<|endoftext|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"151644": {
|
||||||
|
"content": "<|im_start|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"151645": {
|
||||||
|
"content": "<|im_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"151646": {
|
||||||
|
"content": "<|object_ref_start|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"151647": {
|
||||||
|
"content": "<|object_ref_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"151648": {
|
||||||
|
"content": "<|box_start|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"151649": {
|
||||||
|
"content": "<|box_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"151650": {
|
||||||
|
"content": "<|quad_start|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"151651": {
|
||||||
|
"content": "<|quad_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"151652": {
|
||||||
|
"content": "<|vision_start|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"151653": {
|
||||||
|
"content": "<|vision_end|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"151654": {
|
||||||
|
"content": "<|vision_pad|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"151655": {
|
||||||
|
"content": "<|image_pad|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"151656": {
|
||||||
|
"content": "<|video_pad|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": true
|
||||||
|
},
|
||||||
|
"151657": {
|
||||||
|
"content": "<tool_call>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
},
|
||||||
|
"151658": {
|
||||||
|
"content": "</tool_call>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
},
|
||||||
|
"151659": {
|
||||||
|
"content": "<|fim_prefix|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
},
|
||||||
|
"151660": {
|
||||||
|
"content": "<|fim_middle|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
},
|
||||||
|
"151661": {
|
||||||
|
"content": "<|fim_suffix|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
},
|
||||||
|
"151662": {
|
||||||
|
"content": "<|fim_pad|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
},
|
||||||
|
"151663": {
|
||||||
|
"content": "<|repo_name|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
},
|
||||||
|
"151664": {
|
||||||
|
"content": "<|file_sep|>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
},
|
||||||
|
"151665": {
|
||||||
|
"content": "<tool_response>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
},
|
||||||
|
"151666": {
|
||||||
|
"content": "</tool_response>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
},
|
||||||
|
"151667": {
|
||||||
|
"content": "<think>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
},
|
||||||
|
"151668": {
|
||||||
|
"content": "</think>",
|
||||||
|
"lstrip": false,
|
||||||
|
"normalized": false,
|
||||||
|
"rstrip": false,
|
||||||
|
"single_word": false,
|
||||||
|
"special": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additional_special_tokens": [
|
||||||
|
"<|im_start|>",
|
||||||
|
"<|im_end|>",
|
||||||
|
"<|object_ref_start|>",
|
||||||
|
"<|object_ref_end|>",
|
||||||
|
"<|box_start|>",
|
||||||
|
"<|box_end|>",
|
||||||
|
"<|quad_start|>",
|
||||||
|
"<|quad_end|>",
|
||||||
|
"<|vision_start|>",
|
||||||
|
"<|vision_end|>",
|
||||||
|
"<|vision_pad|>",
|
||||||
|
"<|image_pad|>",
|
||||||
|
"<|video_pad|>"
|
||||||
|
],
|
||||||
|
"bos_token": null,
|
||||||
|
"clean_up_tokenization_spaces": false,
|
||||||
|
"eos_token": "<|endoftext|>",
|
||||||
|
"errors": "replace",
|
||||||
|
"extra_special_tokens": {},
|
||||||
|
"model_max_length": 1024,
|
||||||
|
"pad_token": "<|endoftext|>",
|
||||||
|
"split_special_tokens": false,
|
||||||
|
"tokenizer_class": "Qwen2Tokenizer",
|
||||||
|
"unk_token": null
|
||||||
|
}
|
||||||
1
vocab.json
Normal file
1
vocab.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user