初始化项目,由ModelHub XC社区提供模型
Model: RthItalia/NanoLLM-Qwen2.5-14B-v3.1 Source: Original Platform
This commit is contained in:
54
nano_compact/chat_template.jinja
Normal file
54
nano_compact/chat_template.jinja
Normal file
@@ -0,0 +1,54 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{{- messages[0]['content'] }}
|
||||
{%- else %}
|
||||
{{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
|
||||
{%- endif %}
|
||||
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0]['role'] == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- for message in messages %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{{- '<|im_start|>' + message.role }}
|
||||
{%- if message.content %}
|
||||
{{- '\n' + message.content }}
|
||||
{%- endif %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if tool_call.function is defined %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- message.content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- endif %}
|
||||
2231
nano_compact/config.json
Normal file
2231
nano_compact/config.json
Normal file
File diff suppressed because it is too large
Load Diff
3
nano_compact/model.safetensors
Normal file
3
nano_compact/model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d052d5b20bebde4836c9559d5c9baf6b65ad2af02bb10da9d23a7416a10e6970
|
||||
size 14779293592
|
||||
118
nano_compact/modeling_nanollm.py
Normal file
118
nano_compact/modeling_nanollm.py
Normal file
@@ -0,0 +1,118 @@
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from transformers.models.qwen2.configuration_qwen2 import Qwen2Config
|
||||
from transformers.models.qwen2.modeling_qwen2 import Qwen2ForCausalLM
|
||||
|
||||
class NanoInt8Linear(nn.Module):
|
||||
def __init__(self, in_features, out_features, has_bias=False):
|
||||
super().__init__()
|
||||
self.in_features = int(in_features)
|
||||
self.out_features = int(out_features)
|
||||
self.has_bias = bool(has_bias)
|
||||
self.register_buffer("q", torch.empty((self.out_features, self.in_features), dtype=torch.int8))
|
||||
self.register_buffer("scale", torch.empty((self.out_features,), dtype=torch.float16))
|
||||
if self.has_bias:
|
||||
self.register_buffer("bias", torch.empty((self.out_features,), dtype=torch.float16))
|
||||
|
||||
def forward(self, x):
|
||||
dt = x.dtype
|
||||
f = x.to(torch.float16).reshape(-1, x.shape[-1])
|
||||
w = self.q.to(f.device, torch.float16) * self.scale.to(f.device).unsqueeze(1)
|
||||
y = f @ w.t()
|
||||
if self.has_bias:
|
||||
y = y + self.bias.to(f.device)
|
||||
return y.reshape(*x.shape[:-1], self.out_features).to(dt)
|
||||
|
||||
class NanoTrueQuantLinear(nn.Module):
|
||||
def __init__(self, in_features, out_features, prot_rows, deg_rows, has_bias=False):
|
||||
super().__init__()
|
||||
self.in_features = int(in_features)
|
||||
self.out_features = int(out_features)
|
||||
self.has_bias = bool(has_bias)
|
||||
self.register_buffer("prot_q", torch.empty((prot_rows, self.in_features), dtype=torch.int8))
|
||||
self.register_buffer("prot_scale", torch.empty((prot_rows,), dtype=torch.float16))
|
||||
self.register_buffer("prot_idx", torch.empty((prot_rows,), dtype=torch.long))
|
||||
self.register_buffer("deg_q", torch.empty((deg_rows, self.in_features), dtype=torch.int8))
|
||||
self.register_buffer("deg_scale", torch.empty((deg_rows,), dtype=torch.float16))
|
||||
self.register_buffer("deg_idx", torch.empty((deg_rows,), dtype=torch.long))
|
||||
if self.has_bias:
|
||||
self.register_buffer("bias", torch.empty((self.out_features,), dtype=torch.float16))
|
||||
|
||||
def forward(self, x):
|
||||
dt = x.dtype
|
||||
f = x.to(torch.float16).reshape(-1, x.shape[-1])
|
||||
y = torch.zeros((f.shape[0], self.out_features), dtype=torch.float16, device=f.device)
|
||||
if self.prot_q.shape[0] > 0:
|
||||
w = self.prot_q.to(f.device, torch.float16) * self.prot_scale.to(f.device).unsqueeze(1)
|
||||
y.index_copy_(-1, self.prot_idx.to(f.device), f @ w.t())
|
||||
if self.deg_q.shape[0] > 0:
|
||||
w = self.deg_q.to(f.device, torch.float16) * self.deg_scale.to(f.device).unsqueeze(1)
|
||||
y.index_copy_(-1, self.deg_idx.to(f.device), f @ w.t())
|
||||
if self.has_bias:
|
||||
y = y + self.bias.to(f.device)
|
||||
return y.reshape(*x.shape[:-1], self.out_features).to(dt)
|
||||
|
||||
class NanoEmbedding(nn.Module):
|
||||
def __init__(self, num_embeddings, embedding_dim):
|
||||
super().__init__()
|
||||
self.num_embeddings = int(num_embeddings)
|
||||
self.embedding_dim = int(embedding_dim)
|
||||
self.register_buffer("q", torch.empty((self.num_embeddings, self.embedding_dim), dtype=torch.int8))
|
||||
self.register_buffer("scale", torch.empty((self.num_embeddings,), dtype=torch.float16))
|
||||
|
||||
def forward(self, input_ids):
|
||||
return self.q[input_ids].to(torch.float16) * self.scale[input_ids].to(torch.float16).unsqueeze(-1)
|
||||
|
||||
|
||||
|
||||
class NanoTiedLMHead(nn.Module):
|
||||
def __init__(self, embedding):
|
||||
super().__init__()
|
||||
self.register_buffer("q", embedding.q.detach().clone())
|
||||
self.register_buffer("scale", embedding.scale.detach().clone())
|
||||
|
||||
def forward(self, x):
|
||||
w = self.q.to(x.device, torch.float16) * self.scale.to(x.device).unsqueeze(1)
|
||||
return x.to(torch.float16) @ w.t()
|
||||
|
||||
def _set_module(root, name, module):
|
||||
cur = root
|
||||
parts = name.split(".")
|
||||
for p in parts[:-1]:
|
||||
cur = cur[int(p)] if p.isdigit() else getattr(cur, p)
|
||||
setattr(cur, parts[-1], module)
|
||||
|
||||
class NanoQwenForCausalLM(Qwen2ForCausalLM):
|
||||
config_class = Qwen2Config
|
||||
|
||||
def tie_weights(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
def mark_tied_weights_as_initialized(self, *args, **kwargs):
|
||||
return None
|
||||
|
||||
def __init__(self, config):
|
||||
config.tie_word_embeddings = False
|
||||
super().__init__(config)
|
||||
self.config.tie_word_embeddings = False
|
||||
self._tied_weights_keys = []
|
||||
self.all_tied_weights_keys = {}
|
||||
mods = getattr(config, "nanollm_modules", {})
|
||||
for name, spec in mods.items():
|
||||
kind = spec["kind"]
|
||||
if kind == "embedding":
|
||||
mod = NanoEmbedding(spec["num_embeddings"], spec["embedding_dim"])
|
||||
elif kind == "int8_linear":
|
||||
mod = NanoInt8Linear(spec["in_features"], spec["out_features"], spec.get("has_bias", False))
|
||||
elif kind == "truequant_linear":
|
||||
mod = NanoTrueQuantLinear(
|
||||
spec["in_features"], spec["out_features"],
|
||||
spec["prot_rows"], spec["deg_rows"],
|
||||
spec.get("has_bias", False),
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unknown Nano module kind: {kind}")
|
||||
_set_module(self, name, mod)
|
||||
if "lm_head" not in mods and isinstance(self.model.embed_tokens, NanoEmbedding):
|
||||
self.lm_head = NanoTiedLMHead(self.model.embed_tokens)
|
||||
6
nano_compact/nano_compact_spec.json
Normal file
6
nano_compact/nano_compact_spec.json
Normal file
@@ -0,0 +1,6 @@
|
||||
{
|
||||
"format": "compact-safetensors-v1",
|
||||
"base_model_id": "Qwen/Qwen2.5-14B-Instruct",
|
||||
"artifact_dir": "/workspace/nano_rebuild/runs_14b/099/final_artifact_Qwen2.5-14B-Instruct",
|
||||
"requires_trust_remote_code": true
|
||||
}
|
||||
3
nano_compact/tokenizer.json
Normal file
3
nano_compact/tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:3fd169731d2cbde95e10bf356d66d5997fd885dd8dbb6fb4684da3f23b2585d8
|
||||
size 11421892
|
||||
30
nano_compact/tokenizer_config.json
Normal file
30
nano_compact/tokenizer_config.json
Normal file
@@ -0,0 +1,30 @@
|
||||
{
|
||||
"add_prefix_space": false,
|
||||
"backend": "tokenizers",
|
||||
"bos_token": null,
|
||||
"clean_up_tokenization_spaces": false,
|
||||
"eos_token": "<|im_end|>",
|
||||
"errors": "replace",
|
||||
"extra_special_tokens": [
|
||||
"<|im_start|>",
|
||||
"<|im_end|>",
|
||||
"<|object_ref_start|>",
|
||||
"<|object_ref_end|>",
|
||||
"<|box_start|>",
|
||||
"<|box_end|>",
|
||||
"<|quad_start|>",
|
||||
"<|quad_end|>",
|
||||
"<|vision_start|>",
|
||||
"<|vision_end|>",
|
||||
"<|vision_pad|>",
|
||||
"<|image_pad|>",
|
||||
"<|video_pad|>"
|
||||
],
|
||||
"is_local": true,
|
||||
"local_files_only": false,
|
||||
"model_max_length": 131072,
|
||||
"pad_token": "<|endoftext|>",
|
||||
"split_special_tokens": false,
|
||||
"tokenizer_class": "Qwen2Tokenizer",
|
||||
"unk_token": null
|
||||
}
|
||||
Reference in New Issue
Block a user