初始化项目,由ModelHub XC社区提供模型
Model: MihaiPopa-1/OmniTranslate-1.1 Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
628
README.md
Normal file
628
README.md
Normal file
@@ -0,0 +1,628 @@
|
||||
---
|
||||
base_model: MihaiPopa-1/OmniTranslate-1.0
|
||||
# base_model: Unsloth/Qwen3-0.6B-Unsloth-bnb-4bit - Variant that I used for fine-tuning (4-bit BNB quant by Unsloth)
|
||||
tags:
|
||||
- text-generation-inference
|
||||
- translation
|
||||
- transformers
|
||||
- unsloth
|
||||
- qwen3
|
||||
- omnitranslate
|
||||
license: apache-2.0
|
||||
language:
|
||||
- abk
|
||||
- abq
|
||||
- abs
|
||||
- acm
|
||||
- adh
|
||||
- adi
|
||||
- ady
|
||||
- aeb
|
||||
- afr
|
||||
- agx
|
||||
- aii
|
||||
- aim
|
||||
- ain
|
||||
- ajz
|
||||
- akb
|
||||
- aln
|
||||
- als
|
||||
- alt
|
||||
- amh
|
||||
- anp
|
||||
- aoz
|
||||
- apc
|
||||
- apt
|
||||
- arb
|
||||
- arg
|
||||
- arq
|
||||
- ars
|
||||
- ary
|
||||
- arz
|
||||
- asm
|
||||
- ast
|
||||
- atb
|
||||
- ava
|
||||
- awa
|
||||
- ayp
|
||||
- ayr
|
||||
- azb
|
||||
- azj
|
||||
- bak
|
||||
- bam
|
||||
- ban
|
||||
- bar
|
||||
- bas
|
||||
- bbc
|
||||
- bbk
|
||||
- bcl
|
||||
- bdq
|
||||
- bel
|
||||
- ben
|
||||
- bew
|
||||
- bho
|
||||
- bhp
|
||||
- bis
|
||||
- biu
|
||||
- bjn
|
||||
- bod
|
||||
- bos
|
||||
- brh
|
||||
- brx
|
||||
- bts
|
||||
- btx
|
||||
- bug
|
||||
- bul
|
||||
- bwi
|
||||
- bxr
|
||||
- cat
|
||||
- cbk
|
||||
- ccp
|
||||
- ceb
|
||||
- ces
|
||||
- cfm
|
||||
- cha
|
||||
- che
|
||||
- chr
|
||||
- chu
|
||||
- chv
|
||||
- cjs
|
||||
- ckb
|
||||
- ckt
|
||||
- cmn
|
||||
- cnh
|
||||
- cnw
|
||||
- cos
|
||||
- crh
|
||||
- crj
|
||||
- crk
|
||||
- crl
|
||||
- crs
|
||||
- csb
|
||||
- csw
|
||||
- csy
|
||||
- ctd
|
||||
- cym
|
||||
- czt
|
||||
- dak
|
||||
- dan
|
||||
- dar
|
||||
- deu
|
||||
- dik
|
||||
- diu
|
||||
- div
|
||||
- dje
|
||||
- dks
|
||||
- dln
|
||||
- dng
|
||||
- dnw
|
||||
- doi
|
||||
- dru
|
||||
- dsb
|
||||
- dtp
|
||||
- dty
|
||||
- dzo
|
||||
- ekk
|
||||
- ell
|
||||
- emj
|
||||
- enl
|
||||
- enm
|
||||
- epo
|
||||
- ess
|
||||
- eus
|
||||
- eve
|
||||
- ewo
|
||||
- ext
|
||||
- fao
|
||||
- fas
|
||||
- ffm
|
||||
- fij
|
||||
- fil
|
||||
- fin
|
||||
- fit
|
||||
- fkv
|
||||
- fmu
|
||||
- fra
|
||||
- fro
|
||||
- frp
|
||||
- fry
|
||||
- fuf
|
||||
- fur
|
||||
- fuv
|
||||
- gag
|
||||
- gaz
|
||||
- gcf
|
||||
- gla
|
||||
- gle
|
||||
- glg
|
||||
- glk
|
||||
- glv
|
||||
- gmh
|
||||
- gnb
|
||||
- goh
|
||||
- gom
|
||||
- gos
|
||||
- grc
|
||||
- gsw
|
||||
- gug
|
||||
- guj
|
||||
- guz
|
||||
- hac
|
||||
- hae
|
||||
- hak
|
||||
- hat
|
||||
- hau
|
||||
- haw
|
||||
- hbo
|
||||
- heb
|
||||
- her
|
||||
- hif
|
||||
- hil
|
||||
- hin
|
||||
- hmr
|
||||
- hne
|
||||
- hns
|
||||
- hrv
|
||||
- hrx
|
||||
- hsb
|
||||
- hun
|
||||
- hwc
|
||||
- hye
|
||||
- hyw
|
||||
- iba
|
||||
- ibg
|
||||
- ibo
|
||||
- ife
|
||||
- ike
|
||||
- ikt
|
||||
- ilo
|
||||
- ina
|
||||
- ind
|
||||
- inh
|
||||
- isl
|
||||
- ita
|
||||
- ivv
|
||||
- jav
|
||||
- jpn
|
||||
- jun
|
||||
- kaa
|
||||
- kab
|
||||
- kac
|
||||
- kak
|
||||
- kal
|
||||
- kam
|
||||
- kan
|
||||
- kas
|
||||
- kat
|
||||
- kaz
|
||||
- kbd
|
||||
- kca
|
||||
- kdh
|
||||
- kdr
|
||||
- kea
|
||||
- kei
|
||||
- kgp
|
||||
- kha
|
||||
- khk
|
||||
- khm
|
||||
- kik
|
||||
- kin
|
||||
- kir
|
||||
- kiu
|
||||
- kjb
|
||||
- kjh
|
||||
- kmr
|
||||
- knc
|
||||
- koi
|
||||
- kor
|
||||
- kos
|
||||
- kpv
|
||||
- krj
|
||||
- krl
|
||||
- kru
|
||||
- ksh
|
||||
- ksw
|
||||
- ktj
|
||||
- ktz
|
||||
- kua
|
||||
- kum
|
||||
- kwn
|
||||
- kyu
|
||||
- kzj
|
||||
- lad
|
||||
- lao
|
||||
- lat
|
||||
- lbe
|
||||
- ldn
|
||||
- lew
|
||||
- lez
|
||||
- lfn
|
||||
- lim
|
||||
- lin
|
||||
- lis
|
||||
- lit
|
||||
- lki
|
||||
- lld
|
||||
- lmk
|
||||
- lnd
|
||||
- lrc
|
||||
- ltg
|
||||
- ltz
|
||||
- lud
|
||||
- lug
|
||||
- luo
|
||||
- lus
|
||||
- lvs
|
||||
- lwg
|
||||
- lzh
|
||||
- mag
|
||||
- mah
|
||||
- mai
|
||||
- mak
|
||||
- mal
|
||||
- mar
|
||||
- mas
|
||||
- mbf
|
||||
- mdf
|
||||
- mer
|
||||
- mfe
|
||||
- mfg
|
||||
- mfy
|
||||
- mhi
|
||||
- mhr
|
||||
- mhy
|
||||
- min
|
||||
- mip
|
||||
- mjw
|
||||
- mkd
|
||||
- mlt
|
||||
- mni
|
||||
- mnk
|
||||
- mns
|
||||
- mnw
|
||||
- moh
|
||||
- mph
|
||||
- mqy
|
||||
- mri
|
||||
- mrj
|
||||
- mrw
|
||||
- mtg
|
||||
- mui
|
||||
- mup
|
||||
- mus
|
||||
- mvp
|
||||
- mwf
|
||||
- mwl
|
||||
- mww
|
||||
- mya
|
||||
- myv
|
||||
- myx
|
||||
- mzh
|
||||
- nah
|
||||
- nan
|
||||
- nap
|
||||
- naq
|
||||
- nbu
|
||||
- nde
|
||||
- ndo
|
||||
- nds
|
||||
- new
|
||||
- nio
|
||||
- njn
|
||||
- njo
|
||||
- nld
|
||||
- nmf
|
||||
- nmz
|
||||
- nno
|
||||
- nob
|
||||
- nog
|
||||
- non
|
||||
- npi
|
||||
- npo
|
||||
- nrf
|
||||
- nri
|
||||
- nrm
|
||||
- nse
|
||||
- nus
|
||||
- nya
|
||||
- nyn
|
||||
- nzm
|
||||
- obo
|
||||
- oci
|
||||
- ojb
|
||||
- olo
|
||||
- orv
|
||||
- ory
|
||||
- oss
|
||||
- ota
|
||||
- oto
|
||||
- otw
|
||||
- pam
|
||||
- pan
|
||||
- pap
|
||||
- pbt
|
||||
- pcd
|
||||
- pck
|
||||
- pcm
|
||||
- pfl
|
||||
- plt
|
||||
- pmq
|
||||
- pmx
|
||||
- pnb
|
||||
- pnt
|
||||
- pol
|
||||
- por
|
||||
- pov
|
||||
- ppk
|
||||
- pps
|
||||
- prg
|
||||
- pui
|
||||
- pxm
|
||||
- quc
|
||||
- qul
|
||||
- qup
|
||||
- qus
|
||||
- quz
|
||||
- raw
|
||||
- rcf
|
||||
- rel
|
||||
- rhg
|
||||
- ria
|
||||
- rjs
|
||||
- rmc
|
||||
- rml
|
||||
- rmn
|
||||
- rmy
|
||||
- rnl
|
||||
- roh
|
||||
- ron
|
||||
- rtm
|
||||
- rue
|
||||
- run
|
||||
- rus
|
||||
- sah
|
||||
- san
|
||||
- sat
|
||||
- sck
|
||||
- scn
|
||||
- sda
|
||||
- sdc
|
||||
- sdh
|
||||
- ses
|
||||
- sgc
|
||||
- sgh
|
||||
- sid
|
||||
- sin
|
||||
- sju
|
||||
- skr
|
||||
- slk
|
||||
- slv
|
||||
- sma
|
||||
- sme
|
||||
- smj
|
||||
- smn
|
||||
- smo
|
||||
- sms
|
||||
- smt
|
||||
- sna
|
||||
- snd
|
||||
- som
|
||||
- sot
|
||||
- spa
|
||||
- srd
|
||||
- srp
|
||||
- ssw
|
||||
- sul
|
||||
- sun
|
||||
- swe
|
||||
- swg
|
||||
- swh
|
||||
- syc
|
||||
- syl
|
||||
- szl
|
||||
- tab
|
||||
- tam
|
||||
- taq
|
||||
- tat
|
||||
- tcy
|
||||
- tcz
|
||||
- tel
|
||||
- tet
|
||||
- tgk
|
||||
- tha
|
||||
- thl
|
||||
- tig
|
||||
- tir
|
||||
- tkl
|
||||
- tkr
|
||||
- tlh
|
||||
- tly
|
||||
- tok
|
||||
- ton
|
||||
- tpi
|
||||
- tpw
|
||||
- trc
|
||||
- trp
|
||||
- trs
|
||||
- ttj
|
||||
- tuk
|
||||
- tur
|
||||
- tuv
|
||||
- twx
|
||||
- tyv
|
||||
- tzl
|
||||
- tzm
|
||||
- udm
|
||||
- uig
|
||||
- ukr
|
||||
- urd
|
||||
- uzn
|
||||
- uzs
|
||||
- vap
|
||||
- vie
|
||||
- vot
|
||||
- vro
|
||||
- war
|
||||
- way
|
||||
- wba
|
||||
- wbm
|
||||
- wes
|
||||
- whk
|
||||
- wlx
|
||||
- wol
|
||||
- wsg
|
||||
- wwa
|
||||
- xal
|
||||
- xho
|
||||
- xmm
|
||||
- xmv
|
||||
- xog
|
||||
- yaz
|
||||
- ydd
|
||||
- yor
|
||||
- yrk
|
||||
- yrl
|
||||
- yua
|
||||
- yue
|
||||
- zea
|
||||
- zgh
|
||||
- zom
|
||||
- zsm
|
||||
- zul
|
||||
pipeline_tag: translation
|
||||
datasets:
|
||||
- MihaiPopa-1/OmniSurgical-1.1
|
||||
---
|
||||
|
||||
# OmniTranslate 1.1
|
||||
|
||||
OmniTranslate 1.1 is a massively multilingual machine translation model supporting over 500 languages. Fine-tuned from [Qwen 3 0.6B](https://www.huggingface.co/Qwen/Qwen3-0.6B) (with Unsloth), this model is designed for translation tasks on any device!
|
||||
|
||||
# Features
|
||||
* **500+ Languages Supported:** The broadest coverage of languages supported for a translation model that's under 1 billion parameters!
|
||||
* **Tiny Size:** Beats any other large model on speed and memory usage. No other model is able to compete with this!
|
||||
|
||||
# Improvements over 1.0
|
||||
* OmniTranslate now makes less hiccups when translating to Romanian (like "ami"), and the diacritic bug on Romanian translations has been mostly fixed!
|
||||
|
||||
There's a tiny chance that the model will spit out without diacritics (mostly due to seeds) though, so try a different one.
|
||||
|
||||
# Experimental Features
|
||||
* We added 2 new languages, Emoji and Sulfuristic Speak (my own language for OmniTranslate 1.1 to quite fit the Chaos Cubed Minecraft vibe!). Try these out:
|
||||
|
||||
## Emoji
|
||||
```python
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
import torch
|
||||
|
||||
# 1. Load from your Hugging Face Repo
|
||||
model_id = "MihaiPopa-1/OmniTranslate-1.1"
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id,
|
||||
torch_dtype=torch.float32, # Standard for CPU
|
||||
device_map="cpu" # Forces CPU usage
|
||||
)
|
||||
|
||||
# 2. Translate to Emoji
|
||||
prompt = "<|im_start|>user\nTranslate to emj_Emoj: We love the world!<|im_end|>\n<|im_start|>assistant\n<think>\n"
|
||||
inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
|
||||
|
||||
with torch.no_grad():
|
||||
outputs = model.generate(**inputs, max_new_tokens=64, temperature=0.1)
|
||||
|
||||
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
||||
```
|
||||
|
||||
## Sulfuristic Speak
|
||||
```python
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
import torch
|
||||
|
||||
# 1. Load from your Hugging Face Repo
|
||||
model_id = "MihaiPopa-1/OmniTranslate-1.1"
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id,
|
||||
torch_dtype=torch.float32, # Standard for CPU
|
||||
device_map="cpu" # Forces CPU usage
|
||||
)
|
||||
|
||||
# 2. Translate to Sulfuristic Speak ("Translate to Sulfuristic Speak" also works too!)
|
||||
prompt = "<|im_start|>user\nTranslate to sul_Latn: Let's ride a Sulfur Cube!<|im_end|>\n<|im_start|>assistant\n<think>\n"
|
||||
inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
|
||||
|
||||
with torch.no_grad():
|
||||
outputs = model.generate(**inputs, max_new_tokens=128, temperature=0.1)
|
||||
|
||||
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
||||
```
|
||||
|
||||
# Notes
|
||||
OmniTranslate 1.1 is still a experimental model and shouldn't be used for tasks where accurate translations matter.
|
||||
|
||||
# Notes
|
||||
Providing the ISO code instead of the language name can improve the results a lot.
|
||||
|
||||
# Usage
|
||||
Code is by Gemini 3 Flash (then some little modifications by myself):
|
||||
```python
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
import torch
|
||||
|
||||
# 1. Load from your Hugging Face Repo
|
||||
model_id = "MihaiPopa-1/OmniTranslate-1.1"
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id,
|
||||
torch_dtype=torch.float32, # Standard for CPU
|
||||
device_map="cpu" # Forces CPU usage
|
||||
)
|
||||
|
||||
# 2. Translate (replace ron_Latn with your language here)
|
||||
prompt = "<|im_start|>user\nTranslate to ron_Latn: OmniTranslate is a massively multilingual machine translation model supporting over 500 languages!<|im_end|>\n<|im_start|>assistant\n<think>\n"
|
||||
inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
|
||||
|
||||
with torch.no_grad():
|
||||
outputs = model.generate(**inputs, max_new_tokens=256, temperature=0.1)
|
||||
|
||||
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
||||
```
|
||||
|
||||
# Data Used
|
||||
I used my own [OmniSurgical 1.1](https://www.huggingface.co/datasets/MihaiPopa-1/OmniSurgical-1.1), which the dataset itself contains a part of [HF's FineTranslations](https://www.huggingface.co/datasets/HuggingFaceFW/finetranslations)
|
||||
|
||||
---
|
||||
|
||||
# Uploaded finetuned model
|
||||
|
||||
- **Developed by:** MihaiPopa-1
|
||||
- **License:** apache-2.0
|
||||
- **Finetuned from model :** unsloth/qwen3-0.6b-unsloth-bnb-4bit
|
||||
|
||||
This qwen3 model was trained 2x faster with [Unsloth](https://github.com/unslothai/unsloth) and Huggingface's TRL library.
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/unslothai/unsloth/main/images/unsloth%20made%20with%20love.png" width="200"/>](https://github.com/unslothai/unsloth)
|
||||
99
chat_template.jinja
Normal file
99
chat_template.jinja
Normal file
@@ -0,0 +1,99 @@
|
||||
{%- if tools %}
|
||||
{{- '<|im_start|>system\n' }}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- messages[0].content + '\n\n' }}
|
||||
{%- endif %}
|
||||
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
||||
{%- for tool in tools %}
|
||||
{{- "\n" }}
|
||||
{{- tool | tojson }}
|
||||
{%- endfor %}
|
||||
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
||||
{%- else %}
|
||||
{%- if messages[0].role == 'system' %}
|
||||
{{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
|
||||
{%- for forward_message in messages %}
|
||||
{%- set index = (messages|length - 1) - loop.index0 %}
|
||||
{%- set message = messages[index] %}
|
||||
{%- set current_content = message.content if message.content is defined and message.content is not none else '' %}
|
||||
{%- set tool_start = '<tool_response>' %}
|
||||
{%- set tool_start_length = tool_start|length %}
|
||||
{%- set start_of_message = current_content[:tool_start_length] %}
|
||||
{%- set tool_end = '</tool_response>' %}
|
||||
{%- set tool_end_length = tool_end|length %}
|
||||
{%- set start_pos = (current_content|length) - tool_end_length %}
|
||||
{%- if start_pos < 0 %}
|
||||
{%- set start_pos = 0 %}
|
||||
{%- endif %}
|
||||
{%- set end_of_message = current_content[start_pos:] %}
|
||||
{%- if ns.multi_step_tool and message.role == "user" and not(start_of_message == tool_start and end_of_message == tool_end) %}
|
||||
{%- set ns.multi_step_tool = false %}
|
||||
{%- set ns.last_query_index = index %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- for message in messages %}
|
||||
{%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
||||
{%- elif message.role == "assistant" %}
|
||||
{%- set m_content = message.content if message.content is defined and message.content is not none else '' %}
|
||||
{%- set content = m_content %}
|
||||
{%- set reasoning_content = '' %}
|
||||
{%- if message.reasoning_content is defined and message.reasoning_content is not none %}
|
||||
{%- set reasoning_content = message.reasoning_content %}
|
||||
{%- else %}
|
||||
{%- if '</think>' in m_content %}
|
||||
{%- set content = (m_content.split('</think>')|last).lstrip('\n') %}
|
||||
{%- set reasoning_content = (m_content.split('</think>')|first).rstrip('\n') %}
|
||||
{%- set reasoning_content = (reasoning_content.split('<think>')|last).lstrip('\n') %}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- if loop.index0 > ns.last_query_index %}
|
||||
{%- if loop.last or (not loop.last and (not reasoning_content.strip() == '')) %}
|
||||
{{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- else %}
|
||||
{{- '<|im_start|>' + message.role + '\n' + content }}
|
||||
{%- endif %}
|
||||
{%- if message.tool_calls %}
|
||||
{%- for tool_call in message.tool_calls %}
|
||||
{%- if (loop.first and content) or (not loop.first) %}
|
||||
{{- '\n' }}
|
||||
{%- endif %}
|
||||
{%- if tool_call.function %}
|
||||
{%- set tool_call = tool_call.function %}
|
||||
{%- endif %}
|
||||
{{- '<tool_call>\n{"name": "' }}
|
||||
{{- tool_call.name }}
|
||||
{{- '", "arguments": ' }}
|
||||
{%- if tool_call.arguments is string %}
|
||||
{{- tool_call.arguments }}
|
||||
{%- else %}
|
||||
{{- tool_call.arguments | tojson }}
|
||||
{%- endif %}
|
||||
{{- '}\n</tool_call>' }}
|
||||
{%- endfor %}
|
||||
{%- endif %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- elif message.role == "tool" %}
|
||||
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
||||
{{- '<|im_start|>user' }}
|
||||
{%- endif %}
|
||||
{{- '\n<tool_response>\n' }}
|
||||
{{- message.content }}
|
||||
{{- '\n</tool_response>' }}
|
||||
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
||||
{{- '<|im_end|>\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
{%- endfor %}
|
||||
{%- if add_generation_prompt %}
|
||||
{{- '<|im_start|>assistant\n' }}
|
||||
{%- if enable_thinking is defined and enable_thinking is false %}
|
||||
{{- '<think>\n\n</think>\n\n' }}
|
||||
{%- endif %}
|
||||
{%- endif %}
|
||||
64
config.json
Normal file
64
config.json
Normal file
@@ -0,0 +1,64 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": null,
|
||||
"torch_dtype": "float16",
|
||||
"eos_token_id": 151645,
|
||||
"head_dim": 128,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 1024,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 3072,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 40960,
|
||||
"max_window_layers": 28,
|
||||
"model_type": "qwen3",
|
||||
"num_attention_heads": 16,
|
||||
"num_hidden_layers": 28,
|
||||
"num_key_value_heads": 8,
|
||||
"pad_token_id": 151669,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_parameters": {
|
||||
"rope_theta": 1000000,
|
||||
"rope_type": "default"
|
||||
},
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": true,
|
||||
"unsloth_fixed": true,
|
||||
"unsloth_version": "2026.4.4",
|
||||
"use_cache": false,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
3
model.safetensors
Normal file
3
model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ddbd4e1702ef2edb82380ae4453f305c47516c88bb04d40ecb31cebea6830680
|
||||
size 1192135096
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d7430e9138b76e93fb6f93462394d236b411111aef53cb421ba97d2691040cca
|
||||
size 11423114
|
||||
16
tokenizer_config.json
Normal file
16
tokenizer_config.json
Normal file
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user