初始化项目,由ModelHub XC社区提供模型
Model: odats/rl_nmt_2026_04_03_17_04 Source: Original Platform
This commit is contained in:
36
.gitattributes
vendored
Normal file
36
.gitattributes
vendored
Normal file
@@ -0,0 +1,36 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
||||
67
README.md
Normal file
67
README.md
Normal file
@@ -0,0 +1,67 @@
|
||||
---
|
||||
base_model: google/gemma-3-1b-it
|
||||
library_name: transformers
|
||||
model_name: rl_nmt_2026_04_03_17_04
|
||||
tags:
|
||||
- generated_from_trainer
|
||||
- grpo
|
||||
- trl
|
||||
licence: license
|
||||
---
|
||||
|
||||
# Model Card for rl_nmt_2026_04_03_17_04
|
||||
|
||||
This model is a fine-tuned version of [google/gemma-3-1b-it](https://huggingface.co/google/gemma-3-1b-it).
|
||||
It has been trained using [TRL](https://github.com/huggingface/trl).
|
||||
|
||||
## Quick start
|
||||
|
||||
```python
|
||||
from transformers import pipeline
|
||||
|
||||
question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
|
||||
generator = pipeline("text-generation", model="odats/rl_nmt_2026_04_03_17_04", device="cuda")
|
||||
output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
|
||||
print(output["generated_text"])
|
||||
```
|
||||
|
||||
## Training procedure
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/oleg-dats/rlnmt_all/runs/nl5udsp7)
|
||||
|
||||
|
||||
|
||||
This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
|
||||
|
||||
### Framework versions
|
||||
|
||||
- TRL: 1.0.0
|
||||
- Transformers: 4.57.6
|
||||
- Pytorch: 2.10.0
|
||||
- Datasets: 4.8.4
|
||||
- Tokenizers: 0.22.2
|
||||
|
||||
## Citations
|
||||
|
||||
Cite GRPO as:
|
||||
|
||||
```bibtex
|
||||
@article{shao2024deepseekmath,
|
||||
title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}},
|
||||
author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo},
|
||||
year = 2024,
|
||||
eprint = {arXiv:2402.03300},
|
||||
}
|
||||
```
|
||||
|
||||
Cite TRL as:
|
||||
|
||||
```bibtex
|
||||
@software{vonwerra2020trl,
|
||||
title = {{TRL: Transformers Reinforcement Learning}},
|
||||
author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
|
||||
license = {Apache-2.0},
|
||||
url = {https://github.com/huggingface/trl},
|
||||
year = {2020}
|
||||
}
|
||||
```
|
||||
3
added_tokens.json
Normal file
3
added_tokens.json
Normal file
@@ -0,0 +1,3 @@
|
||||
{
|
||||
"<image_soft_token>": 262144
|
||||
}
|
||||
47
chat_template.jinja
Normal file
47
chat_template.jinja
Normal file
@@ -0,0 +1,47 @@
|
||||
{{ bos_token }}
|
||||
{%- if messages[0]['role'] == 'system' -%}
|
||||
{%- if messages[0]['content'] is string -%}
|
||||
{%- set first_user_prefix = messages[0]['content'] + '
|
||||
|
||||
' -%}
|
||||
{%- else -%}
|
||||
{%- set first_user_prefix = messages[0]['content'][0]['text'] + '
|
||||
|
||||
' -%}
|
||||
{%- endif -%}
|
||||
{%- set loop_messages = messages[1:] -%}
|
||||
{%- else -%}
|
||||
{%- set first_user_prefix = "" -%}
|
||||
{%- set loop_messages = messages -%}
|
||||
{%- endif -%}
|
||||
{%- for message in loop_messages -%}
|
||||
{%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}
|
||||
{{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }}
|
||||
{%- endif -%}
|
||||
{%- if (message['role'] == 'assistant') -%}
|
||||
{%- set role = "model" -%}
|
||||
{%- else -%}
|
||||
{%- set role = message['role'] -%}
|
||||
{%- endif -%}
|
||||
{{ '<start_of_turn>' + role + '
|
||||
' + (first_user_prefix if loop.first else "") }}
|
||||
{%- if message['content'] is string -%}
|
||||
{{ message['content'] | trim }}
|
||||
{%- elif message['content'] is iterable -%}
|
||||
{%- for item in message['content'] -%}
|
||||
{%- if item['type'] == 'image' -%}
|
||||
{{ '<start_of_image>' }}
|
||||
{%- elif item['type'] == 'text' -%}
|
||||
{{ item['text'] | trim }}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
{%- else -%}
|
||||
{{ raise_exception("Invalid content type") }}
|
||||
{%- endif -%}
|
||||
{{ '<end_of_turn>
|
||||
' }}
|
||||
{%- endfor -%}
|
||||
{%- if add_generation_prompt -%}
|
||||
{{'<start_of_turn>model
|
||||
'}}
|
||||
{%- endif -%}
|
||||
3
completions/completions_00010.parquet
Normal file
3
completions/completions_00010.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8e5257f90e54384e3d3f05aebf9d14efc13caf3d82c112807823c180354e53ae
|
||||
size 7673
|
||||
3
completions/completions_00020.parquet
Normal file
3
completions/completions_00020.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2880481bee8536fa6cbc329ae5f6ac1c36a3ee28b7840be16b3bc8807d8aaff1
|
||||
size 6976
|
||||
3
completions/completions_00030.parquet
Normal file
3
completions/completions_00030.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b1291b77c009fc529ed9affe19283a1153ea86cea74787e109ada5d52dbebdf2
|
||||
size 9106
|
||||
3
completions/completions_00040.parquet
Normal file
3
completions/completions_00040.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e271628c80b8f41bb32557d2fe4ce8971ac4cca00f167fd2fb2ce34dd4f9d809
|
||||
size 8540
|
||||
3
completions/completions_00050.parquet
Normal file
3
completions/completions_00050.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b41d983de03e9430a5154b0a6f220ec73fcf4fcb128c690723eab1d7693fc2da
|
||||
size 6868
|
||||
3
completions/completions_00060.parquet
Normal file
3
completions/completions_00060.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d879c6be0357bfd3126722856efee76557950ecad51d60cfee55403da462ba73
|
||||
size 5808
|
||||
3
completions/completions_00070.parquet
Normal file
3
completions/completions_00070.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e921e53916bf71b934aec56e06b004f868c92f984b6634debb722a66fb0caa84
|
||||
size 7235
|
||||
3
completions/completions_00080.parquet
Normal file
3
completions/completions_00080.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2508d0499400d0c99c6fc9ee1350ede6576898546ab25e275f002a5bcac256bc
|
||||
size 7478
|
||||
3
completions/completions_00090.parquet
Normal file
3
completions/completions_00090.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ff669f1b55c0148540102feae9be9c0a42264c6fc2a002329af25ff301769195
|
||||
size 7948
|
||||
3
completions/completions_00100.parquet
Normal file
3
completions/completions_00100.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a2f166bf0a02218b1017ff088450caba294a5c88445c7658ab6b393449b8eb0d
|
||||
size 8756
|
||||
3
completions/completions_00110.parquet
Normal file
3
completions/completions_00110.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:26365f9093247dd4c4f1c21d86bce50f5c5b74b0bc15f823c9a80e2d30d55073
|
||||
size 6127
|
||||
3
completions/completions_00120.parquet
Normal file
3
completions/completions_00120.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a1fa5e3684725047215e49e14694b6672232be018e6a1723cc70e836930e9be4
|
||||
size 7716
|
||||
3
completions/completions_00130.parquet
Normal file
3
completions/completions_00130.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:d8577b67d2f4a17c132b21d32a1117d9d09ecb60f9d36b42095fbbd4a28ee1b8
|
||||
size 6933
|
||||
3
completions/completions_00140.parquet
Normal file
3
completions/completions_00140.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:c73ca81bca413d9959a86d8b7b73d53a3b9c0210c97f56ebc3fe6e83a9ce0023
|
||||
size 7076
|
||||
3
completions/completions_00150.parquet
Normal file
3
completions/completions_00150.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:489877f65f30b712656223c0ba7db3e269affb704ff94a46afec06e1775c8690
|
||||
size 6082
|
||||
3
completions/completions_00160.parquet
Normal file
3
completions/completions_00160.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:0ec871ad528e7bee84da1baeb511dbd2ed120f1fa8635ce1c809bb3a1d9eac99
|
||||
size 5820
|
||||
3
completions/completions_00170.parquet
Normal file
3
completions/completions_00170.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:603e42a468db44c155eb8f7109b41c15daf9134d4a3f0ec294c5bf569c0e307a
|
||||
size 11570
|
||||
3
completions/completions_00180.parquet
Normal file
3
completions/completions_00180.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:bbe40229629f103ac38056611b94100e1a690c05b6a524359bd4b496a8a31f2d
|
||||
size 5716
|
||||
3
completions/completions_00190.parquet
Normal file
3
completions/completions_00190.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fe7bdd6a721441e2b09659a04561e22c5a4d7e2062b59f1e52826052aa6f82dd
|
||||
size 6464
|
||||
3
completions/completions_00200.parquet
Normal file
3
completions/completions_00200.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9c14807ee24ad4c2b354ab4b678d17ad1330300598f3731b552a1696831ea65d
|
||||
size 8089
|
||||
3
completions/completions_00210.parquet
Normal file
3
completions/completions_00210.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:98e40f8e99e9c4c1565aaed8a6b083bd022293d43286a341730209ec757e6c4b
|
||||
size 7284
|
||||
3
completions/completions_00220.parquet
Normal file
3
completions/completions_00220.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e3f408c4fe3e11ff0743aa7553bcb8f8d55bb68fb70baae02e6c72e9a143dd0d
|
||||
size 6652
|
||||
3
completions/completions_00230.parquet
Normal file
3
completions/completions_00230.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f91ae72629fcade80dad7f004554b595f43c383741d39d7c58500ba2eda278f5
|
||||
size 8091
|
||||
3
completions/completions_00240.parquet
Normal file
3
completions/completions_00240.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b9ec26b22228448d618c40035991c38c63a22b9d29743ab50ad2aa025f095a7c
|
||||
size 9476
|
||||
3
completions/completions_00250.parquet
Normal file
3
completions/completions_00250.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:beb64ee5ac9e9ea3d5cad1600b74979797cec33705428d4ce5e90e1f6425c887
|
||||
size 7102
|
||||
3
completions/completions_00260.parquet
Normal file
3
completions/completions_00260.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:313b04ce01e3a87d1ca06ca2760553c788404ab72f4b96206b1cd47905c5a9b5
|
||||
size 7605
|
||||
3
completions/completions_00270.parquet
Normal file
3
completions/completions_00270.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a9b3335b45086c79dfcba987fcf33e91a8a1be591decee8bfd73003aaa3ed126
|
||||
size 8157
|
||||
3
completions/completions_00280.parquet
Normal file
3
completions/completions_00280.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ccb488a778ab38a064f3e9210257f65c8ecdb92d74e565595cb28860a9442a68
|
||||
size 8864
|
||||
3
completions/completions_00290.parquet
Normal file
3
completions/completions_00290.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2f7c2b695d5eb910fa0d9f21178ace32f8696cddc35f1e33827d936435258081
|
||||
size 8412
|
||||
3
completions/completions_00300.parquet
Normal file
3
completions/completions_00300.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6ae00ceb1fbecc2d4dbce433dbbd8a8c83b45aec4aba183a60bcf01d6450797b
|
||||
size 7863
|
||||
3
completions/completions_00310.parquet
Normal file
3
completions/completions_00310.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:344ef11bf30374ab41c355f37fde10441c8dfd4f6ac39f70469134865831ccea
|
||||
size 6963
|
||||
3
completions/completions_00320.parquet
Normal file
3
completions/completions_00320.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:621635dd81819e1c9eb841cfffd5d90dcb2a05276b6f40cffe5b3988f8db4ee2
|
||||
size 6455
|
||||
3
completions/completions_00330.parquet
Normal file
3
completions/completions_00330.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:9c1606115eebe6f22091da0942be138c33e0641804030ff9d97ab8cc956426d2
|
||||
size 9958
|
||||
3
completions/completions_00340.parquet
Normal file
3
completions/completions_00340.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:29b407fc1a2f12fa307fed55693cb39fe46fda0fa8d147152c4b723c79d1d4a1
|
||||
size 5920
|
||||
3
completions/completions_00350.parquet
Normal file
3
completions/completions_00350.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:fbdff52341aa7ea2bf71e32a1de8b997d75bf925b39019a99117693e1920d22c
|
||||
size 6288
|
||||
3
completions/completions_00360.parquet
Normal file
3
completions/completions_00360.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:f1e3af6cb4bba4b1867a8e61ef22672af0889fad24e75db67bca50d9680c344d
|
||||
size 7354
|
||||
3
completions/completions_00370.parquet
Normal file
3
completions/completions_00370.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:ba3326d71fa0e3bb68cd60e434381d5fcd8d1d9afd05a0c9a4ca2f8e7622f3aa
|
||||
size 6262
|
||||
3
completions/completions_00380.parquet
Normal file
3
completions/completions_00380.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:143a42e3bf1295383ec3186cad1aaf93a954573d8a016f42ee960c5194151800
|
||||
size 9294
|
||||
3
completions/completions_00390.parquet
Normal file
3
completions/completions_00390.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:e9a713d8bb0df0e7a65874db8fc1a8b1e402a89fd5a7474583a826e9cbc26217
|
||||
size 6811
|
||||
3
completions/completions_00400.parquet
Normal file
3
completions/completions_00400.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:453b85b1d82aade5267e29237b8d500a221657bb53f5176ecebf9dabbb6a4d0b
|
||||
size 7145
|
||||
3
completions/completions_00410.parquet
Normal file
3
completions/completions_00410.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:b99ed9352c0aefcfc1fc9fcb3612551cba83673b26a0dde3666db3c6181b6b62
|
||||
size 9494
|
||||
3
completions/completions_00420.parquet
Normal file
3
completions/completions_00420.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:60264a2699f27c39032aa9d24c6491e7cd8ded9110f9a7b8423ee9ca20bdfd2f
|
||||
size 6636
|
||||
3
completions/completions_00430.parquet
Normal file
3
completions/completions_00430.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:6db32f9143fb439984bbb7e00ee0e3c61fb6e2bea31239fb86a55543ab34b9b7
|
||||
size 6825
|
||||
3
completions/completions_00440.parquet
Normal file
3
completions/completions_00440.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:8215a9a0480877de7051a671691ca91f75c8ce181770b1115e719a8b86379d9e
|
||||
size 8490
|
||||
3
completions/completions_00450.parquet
Normal file
3
completions/completions_00450.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:a9851cbd8fda6ae478c1f6371f39d649611188f761d2c2bb121d8061952bcc83
|
||||
size 9154
|
||||
3
completions/completions_00460.parquet
Normal file
3
completions/completions_00460.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4ec69889c859fee870b37b1cb0cc221302c7630b41a236436d631247e935d983
|
||||
size 6918
|
||||
3
completions/completions_00470.parquet
Normal file
3
completions/completions_00470.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:356cc883960411818b6e101daac7c4525711e6f8c47312fd22425c3165e4f016
|
||||
size 6718
|
||||
3
completions/completions_00480.parquet
Normal file
3
completions/completions_00480.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4fea78bcb1561f0444ad0bf6e3cba99e8be766d45259ba3a935b84c59c3aff47
|
||||
size 8520
|
||||
3
completions/completions_00490.parquet
Normal file
3
completions/completions_00490.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4b485721b5d8a483e5da71f94b65efdbd3ff33ce6e60de786d38c71a62f95774
|
||||
size 10246
|
||||
3
completions/completions_00500.parquet
Normal file
3
completions/completions_00500.parquet
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4b0b902cbaac711f27753bf14e091e06da94b8d66b1be835d25c3aeffc9b2512
|
||||
size 7755
|
||||
64
config.json
Normal file
64
config.json
Normal file
@@ -0,0 +1,64 @@
|
||||
{
|
||||
"_sliding_window_pattern": 6,
|
||||
"architectures": [
|
||||
"Gemma3ForCausalLM"
|
||||
],
|
||||
"attention_bias": false,
|
||||
"attention_dropout": 0.0,
|
||||
"attn_logit_softcapping": null,
|
||||
"bos_token_id": 2,
|
||||
"cache_implementation": "hybrid",
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 1,
|
||||
"final_logit_softcapping": null,
|
||||
"head_dim": 256,
|
||||
"hidden_activation": "gelu_pytorch_tanh",
|
||||
"hidden_size": 1152,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 6912,
|
||||
"layer_types": [
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention",
|
||||
"full_attention",
|
||||
"sliding_attention",
|
||||
"sliding_attention"
|
||||
],
|
||||
"max_position_embeddings": 32768,
|
||||
"model_type": "gemma3_text",
|
||||
"num_attention_heads": 4,
|
||||
"num_hidden_layers": 26,
|
||||
"num_key_value_heads": 1,
|
||||
"pad_token_id": 0,
|
||||
"query_pre_attn_scalar": 256,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_local_base_freq": 10000,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000,
|
||||
"sliding_window": 512,
|
||||
"sliding_window_pattern": 6,
|
||||
"transformers_version": "4.57.6",
|
||||
"use_bidirectional_attention": false,
|
||||
"use_cache": true,
|
||||
"vocab_size": 262144
|
||||
}
|
||||
14
generation_config.json
Normal file
14
generation_config.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"bos_token_id": 2,
|
||||
"cache_implementation": "hybrid",
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
1,
|
||||
1,
|
||||
106
|
||||
],
|
||||
"pad_token_id": 0,
|
||||
"top_k": 64,
|
||||
"top_p": 0.95,
|
||||
"transformers_version": "4.57.6"
|
||||
}
|
||||
3
model.safetensors
Normal file
3
model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:18c66cd2be1cbbdaf6a97f42080ce6778dd91e1d981356ecf1615d42310fb6e3
|
||||
size 1999811208
|
||||
33
special_tokens_map.json
Normal file
33
special_tokens_map.json
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"boi_token": "<start_of_image>",
|
||||
"bos_token": {
|
||||
"content": "<bos>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"eoi_token": "<end_of_image>",
|
||||
"eos_token": {
|
||||
"content": "<eos>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"image_token": "<image_soft_token>",
|
||||
"pad_token": {
|
||||
"content": "<pad>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
},
|
||||
"unk_token": {
|
||||
"content": "<unk>",
|
||||
"lstrip": false,
|
||||
"normalized": false,
|
||||
"rstrip": false,
|
||||
"single_word": false
|
||||
}
|
||||
}
|
||||
3
tokenizer.json
Normal file
3
tokenizer.json
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:4667f2089529e8e7657cfb6d1c19910ae71ff5f28aa7ab2ff2763330affad795
|
||||
size 33384568
|
||||
3
tokenizer.model
Normal file
3
tokenizer.model
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:1299c11d7cf632ef3b4e11937501358ada021bbdf7c47638d13c0ee982f2e79c
|
||||
size 4689074
|
||||
51345
tokenizer_config.json
Normal file
51345
tokenizer_config.json
Normal file
File diff suppressed because it is too large
Load Diff
3
training_args.bin
Normal file
3
training_args.bin
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:2b268efaefb9b680f43b70a1c8c7aeb8fdc5c844b64426388add5fcd5e078c00
|
||||
size 8017
|
||||
Reference in New Issue
Block a user