From 9c67cb3cb4f8076d0d564f124cac68e002fc9f07 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Mon, 18 May 2026 09:26:13 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: turkerberkdonmez/TUSGPT-TR-Medical-9B Source: Original Platform --- .gitattributes | 38 +++ README.md | 225 +++++++++++++++ TUSGPT-TR-Medical-9B-Q4_K_M.gguf | 3 + TUSGPT-TR-Medical-9B-Q8_0.gguf | 3 + chat_template.jinja | 4 + config.json | 33 +++ generation_config.json | 11 + model-00001-of-00007.safetensors | 3 + model-00002-of-00007.safetensors | 3 + model-00003-of-00007.safetensors | 3 + model-00004-of-00007.safetensors | 3 + model-00005-of-00007.safetensors | 3 + model-00006-of-00007.safetensors | 3 + model-00007-of-00007.safetensors | 3 + model.safetensors.index.json | 472 +++++++++++++++++++++++++++++++ tokenizer.json | 3 + tokenizer_config.json | 17 ++ 17 files changed, 830 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 TUSGPT-TR-Medical-9B-Q4_K_M.gguf create mode 100644 TUSGPT-TR-Medical-9B-Q8_0.gguf create mode 100644 chat_template.jinja create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 model-00001-of-00007.safetensors create mode 100644 model-00002-of-00007.safetensors create mode 100644 model-00003-of-00007.safetensors create mode 100644 model-00004-of-00007.safetensors create mode 100644 model-00005-of-00007.safetensors create mode 100644 model-00006-of-00007.safetensors create mode 100644 model-00007-of-00007.safetensors create mode 100644 model.safetensors.index.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..e4b3664 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,38 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +TUSGPT-TR-Medical-9B-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text +TUSGPT-TR-Medical-9B-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..4d323d9 --- /dev/null +++ b/README.md @@ -0,0 +1,225 @@ +--- +language: + - tr +license: apache-2.0 +library_name: transformers +pipeline_tag: text-generation +base_model: ytu-ce-cosmos/Turkish-Gemma-9b-T1 +datasets: + - turkerberkdonmez/TUSGPT-TR-Medical-Dataset-v1 +tags: + - medical + - turkish + - gemma2 + - dora + - sft + - mlx + - apple-silicon +model-index: + - name: TUSGPT-TR-Medical-9B + results: [] +--- + +
+ +# ⚕️ TUSGPT-TR-Medical-9B + +**Türkiye'nin İlk Açık Kaynak Türkçe Medikal Dil Modeli** *Turkey's First Open-Source Turkish Medical Language Model* + +[![License](https://img.shields.io/badge/License-Apache_2.0-green.svg)](https://opensource.org/licenses/Apache-2.0) +[![Base Model](https://img.shields.io/badge/Base_Model-Gemma_2_9B-blue)](https://huggingface.co/ytu-ce-cosmos/Turkish-Gemma-9b-T1) +[![Language](https://img.shields.io/badge/Language-Turkish-red)](https://huggingface.co/languages/tr) +[![Framework](https://img.shields.io/badge/Training-MLX-purple)](https://github.com/ml-explore/mlx) + +![tusgpt_readme_banner_horizontal](https://cdn-uploads.huggingface.co/production/uploads/644f952123d7eb05ca699d31/cAwybl8lXJsIiSYnkj5IG.png) + +[🇹🇷 Türkçe Açıklama](#-model-hakkında) | [🇬🇧 English Description](#-model-description) | [💻 Kullanım/Usage](#-kullanım--usage) + +
+ +--- + +## 🇹🇷 Model Hakkında + +**TUSGPT-TR-Medical-9B**, Türkiye'nin medikal alandaki yapay zeka gelişimine katkı sağlamak amacıyla geliştirilmiş, **Gemma-2** mimarisine dayalı 9 milyar parametreli bir dil modelidir. + +Model, **[ytu-ce-cosmos/Turkish-Gemma-9b-T1](https://huggingface.co/ytu-ce-cosmos/Turkish-Gemma-9b-T1)** temel modeli üzerine, **55.000'den fazla yüksek kaliteli Türkçe tıbbi soru-cevap çifti** ile **2 aşamalı DoRA (Weight-Decomposed Low-Rank Adaptation)** yöntemi kullanılarak fine-tune edilmiştir. + +### 📚 Veri Seti Kapsamı (Dataset Coverage) + +Model, aşağıdaki branşları ve daha fazlasını kapsayan **55,465 Türkçe soru-cevap** çifti ile eğitilmiştir: + +* **Temel Bilimler:** Farmakoloji, Patoloji, Anatomi, Fizyoloji +* **Klinik Bilimler:** Dahiliye, Cerrahi, Pediatri, Kadın Doğum +* **Diğer:** Acil Tıp, Nöroloji, Onkoloji, Radyoloji + +--- + +## 🇬🇧 Model Description + +**TUSGPT-TR-Medical-9B** is a specialized 9-billion parameter language model based on the **Gemma-2** architecture, designed to advance medical AI research in Turkey. + +It is fine-tuned on the **[ytu-ce-cosmos/Turkish-Gemma-9b-T1](https://huggingface.co/ytu-ce-cosmos/Turkish-Gemma-9b-T1)** base model using **55,000+ high-quality Turkish medical Q&A pairs**, with a **2-Stage DoRA** methodology trained on Apple Silicon hardware. + +### 📚 Dataset Scope + +The model covers a wide range of medical disciplines with **55,465 Q&A pairs**, including: + +* **Basic Sciences:** Pharmacology, Pathology, Anatomy, Physiology +* **Clinical Sciences:** Internal Medicine, Surgery, Pediatrics, Obstetrics & Gynecology +* **Others:** Emergency Medicine, Neurology, Oncology, Radiology + +--- + +## 📊 Teknik Detaylar / Technical Details + +| Özellik / Feature | Detay / Detail | +|---|---| +| **Base Model** | [ytu-ce-cosmos/Turkish-Gemma-9b-T1](https://huggingface.co/ytu-ce-cosmos/Turkish-Gemma-9b-T1) | +| **Architecture** | Gemma 2 (9.24B Parameters) | +| **Dataset** | [turkerberkdonmez/TUSGPT-TR-Medical-Dataset-v1](https://huggingface.co/datasets/turkerberkdonmez/TUSGPT-TR-Medical-Dataset-v1) | +| **Dataset Size** | 55,465 samples (Q&A) | +| **Training Method** | 2-Stage DoRA (Weight-Decomposed LoRA) | +| **Precision** | bfloat16 | +| **Hardware** | Apple Mac Studio (M-Series, 128GB Unified Memory) | + +
+🔬 Eğitim Parametrelerini Görüntüle / View Training Hyperparameters + +### Stage 1 — Aggressive Knowledge Injection +* **DoRA:** rank=64, alpha=128, target_modules=all linear layers +* **Optimizer:** NEFTune (alpha=3) enabled +* **Learning Rate:** 2e-5 → 2e-6 (cosine decay) +* **Steps:** 1600 iterations (~1 epoch) + +### Stage 2 — Stabilization +* **Config:** Resumed from Stage 1 best checkpoint +* **Optimizer:** NEFTune disabled +* **Learning Rate:** 5e-6 → 1e-7 +* **Steps:** 1000 iterations +* **Final Val Loss:** 1.126 +
+ +--- + +## 💻 Kullanım / Usage + +### 🐍 Python (Transformers) + +> **Generation önerisi:** Temperature=0.6, TopP=0.95, TopK=20, MinP=0 (generation_config.json varsayılanı). +> **Greedy decoding kullanmayın**; performans düşüşüne ve sonsuz tekrarlara yol açabilir. +> **Complex tasks:** `max_new_tokens` değerini artırın. Gerekirse `repetition_penalty` ve `presence_penalty` (0–2) ayarlanabilir. +> Not: Daha yüksek değerler bazen dil karışmasına ve hafif performans düşüşüne neden olabilir. + +```python +from transformers import AutoModelForCausalLM, AutoTokenizer +import torch + +model_id = "turkerberkdonmez/TUSGPT-TR-Medical-9B" + +tokenizer = AutoTokenizer.from_pretrained(model_id) +model = AutoModelForCausalLM.from_pretrained( + model_id, + torch_dtype=torch.bfloat16, + device_map="auto", +) + +messages = [ + {"role": "system", "content": "Sen tıp alanında uzmanlaşmış, Türkçe yanıt veren bir yapay zeka asistanısın. Soruları doğru, kapsamlı ve anlaşılır biçimde yanıtla."}, + {"role": "user", "content": "Akut miyokard enfarktüsünün erken belirtileri nelerdir?"}, +] + +prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False) +inputs = tokenizer(prompt, return_tensors="pt").to(model.device) + +outputs = model.generate( + **inputs, + max_new_tokens=512, # complex tasks için artırılabilir + temperature=0.6, + top_p=0.95, + top_k=20, + # min_p Transformers'ta her zaman desteklenmeyebilir; model config'ine bağlıdır. + # Aşağıdakiler opsiyonel: tekrarları azaltmak için + # repetition_penalty=1.15, + # presence_penalty=0.3, + do_sample=True, # greedy decoding kapalı +) + +print(tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)) +``` + +### 🦙 GGUF (Ollama & LM Studio) + +Bu modelin sıkıştırılmış (quantized) versiyonları yerel cihazlarda çalıştırılabilir. + +| Dosya Adı (Filename) | Quant | Boyut (Size) | Önerilen Donanım (Recommended HW) | +| ---------------------------------- | ------ | ------------ | --------------------------------- | +| `TUSGPT-TR-Medical-9B-Q8_0.gguf` | Q8_0 | ~9.8 GB | 12GB+ VRAM / 16GB+ RAM | +| `TUSGPT-TR-Medical-9B-Q4_K_M.gguf` | Q4_K_M | ~5.8 GB | 8GB+ VRAM / 12GB+ RAM | + +#### Ollama Setup + +> **Önerilen üretim ayarları:** Temperature=0.6, TopP=0.95, TopK=20, MinP=0 +> **Greedy decoding kullanmayın**; performans düşüşü ve sonsuz tekrar riski yaratabilir. +> **Complex tasks:** `num_predict` (max_new_tokens) artırılabilir. +> Tekrarlar olursa `repeat_penalty` ve `presence_penalty` (0–2) ayarlanabilir (yüksek değerler bazen dil karışmasına ve hafif performans düşüşüne yol açabilir). + +1. **Modelfile Oluşturun / Create Modelfile:** + +```dockerfile +FROM ./TUSGPT-TR-Medical-9B-Q4_K_M.gguf + +SYSTEM "Sen tıp alanında uzmanlaşmış, Türkçe yanıt veren bir yapay zeka asistanısın. Soruları doğru, kapsamlı ve anlaşılır biçimde yanıtla." + +# Recommended generation (DO NOT use greedy decoding) +PARAMETER temperature 0.6 +PARAMETER top_p 0.95 +PARAMETER top_k 20 +PARAMETER min_p 0 + +# Complex tasks: increase num_predict +# PARAMETER num_predict 1024 + +# To reduce endless repetitions (optional; tune gradually) +# PARAMETER repeat_penalty 1.15 +# PARAMETER presence_penalty 0.3 +``` + +2. **Modeli Çalıştırın / Run Model:** + +```bash +ollama create tusgpt-medical -f Modelfile +ollama run tusgpt-medical +``` + +--- + +## ⚠️ Yasal Uyarı / Disclaimer + +#### 🇹🇷 Türkçe + +> **Bu model eğitim ve araştırma amaçlıdır.** Klinik karar verme süreçlerinde tek başına kullanılmamalıdır. Tıbbi kararlar için her zaman uzman hekime danışın. + +#### 🇬🇧 English + +> **This model is for educational and research purposes only.** It should not be used as a sole source for clinical decision-making. Always consult a qualified physician for medical decisions. + +--- + +## 🤝 Acknowledgments + +* **Base Model:** [YTU CE COSMOS Lab](https://huggingface.co/ytu-ce-cosmos) — Turkish-Gemma-9b-T1 +* **Training Framework:** [MLX](https://github.com/ml-explore/mlx) by Apple + +--- + +## 📝 Citation + +```bibtex +@misc{tusgpt-tr-medical-9b, + title = {TUSGPT-TR-Medical-9B: Turkish Medical Language Model}, + author = {Türker Berk Dönmez}, + year = {2026}, + url = {https://huggingface.co/turkerberkdonmez/TUSGPT-TR-Medical-9B} +} +``` \ No newline at end of file diff --git a/TUSGPT-TR-Medical-9B-Q4_K_M.gguf b/TUSGPT-TR-Medical-9B-Q4_K_M.gguf new file mode 100644 index 0000000..99756b0 --- /dev/null +++ b/TUSGPT-TR-Medical-9B-Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:842307b7ed337969fca4c43c10b2e141a7e07a85b9073f10d2952dc95d931300 +size 5761059552 diff --git a/TUSGPT-TR-Medical-9B-Q8_0.gguf b/TUSGPT-TR-Medical-9B-Q8_0.gguf new file mode 100644 index 0000000..61792a9 --- /dev/null +++ b/TUSGPT-TR-Medical-9B-Q8_0.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f028c4327abffca1c68e3f6aec0250039bc0c53cc2d151e8089bf48fbacae84 +size 9827150240 diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..6f24167 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,4 @@ +{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '' + role + ' +' + message['content'] | trim + ' +' }}{% endfor %}{% if add_generation_prompt %}{{'model +'}}{% endif %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..4a5846b --- /dev/null +++ b/config.json @@ -0,0 +1,33 @@ +{ + "architectures": [ + "Gemma2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": 50.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "final_logit_softcapping": 30.0, + "head_dim": 256, + "hidden_act": "gelu_pytorch_tanh", + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 3584, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 8192, + "model_type": "gemma2", + "num_attention_heads": 16, + "num_hidden_layers": 42, + "num_key_value_heads": 8, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_theta": 10000.0, + "sliding_window": 4096, + "sliding_window_size": 4096, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.3", + "use_cache": false, + "vocab_size": 256000 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..75d0fa2 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,11 @@ +{ + "_from_model_config": true, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.51.3", + "temperature": 0.6, + "top_k": 20, + "top_p": 0.95, +} diff --git a/model-00001-of-00007.safetensors b/model-00001-of-00007.safetensors new file mode 100644 index 0000000..3f3354b --- /dev/null +++ b/model-00001-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:11a7508220dfc1de1769431504c9532db34c50af7f17023e39077fdb6282fb65 +size 5182182962 diff --git a/model-00002-of-00007.safetensors b/model-00002-of-00007.safetensors new file mode 100644 index 0000000..c1d7a77 --- /dev/null +++ b/model-00002-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a22df56ccc5cee2c7601bbc8cc120cf1ffca0655f96add7be9e23c5a32ad2ee +size 5167562472 diff --git a/model-00003-of-00007.safetensors b/model-00003-of-00007.safetensors new file mode 100644 index 0000000..cc80826 --- /dev/null +++ b/model-00003-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f9f5bff20c6da578b57bffc01119ea1b0e03d5cec7e521fab780f50419c48346 +size 5343752914 diff --git a/model-00004-of-00007.safetensors b/model-00004-of-00007.safetensors new file mode 100644 index 0000000..f5db7ce --- /dev/null +++ b/model-00004-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4afa5d691fa11100d792a88403ac296fe518aea63fbeea362403d6d86862007d +size 5343752902 diff --git a/model-00005-of-00007.safetensors b/model-00005-of-00007.safetensors new file mode 100644 index 0000000..765acf0 --- /dev/null +++ b/model-00005-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c7cb172cace81e3a200a7c2a909ddc6a78f950f97e5247c20d124c8963f7d98 +size 5167562528 diff --git a/model-00006-of-00007.safetensors b/model-00006-of-00007.safetensors new file mode 100644 index 0000000..ec81adf --- /dev/null +++ b/model-00006-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2452e775db53920e508ebb33bf9a3cde6414f24402857aea04e63abe2a7550c +size 5343752912 diff --git a/model-00007-of-00007.safetensors b/model-00007-of-00007.safetensors new file mode 100644 index 0000000..8037936 --- /dev/null +++ b/model-00007-of-00007.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3f6f268dc711b157b2e846e1af20a1797b6a5d6a563e8e59f7f7323f799be4a +size 3582092055 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000..7df4cca --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,472 @@ +{ + "metadata": { + "total_size": 35130604544, + "total_parameters": 9241705984 + }, + "weight_map": { + "model.embed_tokens.weight": "model-00001-of-00007.safetensors", + "model.layers.0.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.post_feedforward_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.pre_feedforward_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.post_feedforward_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.pre_feedforward_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.10.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.10.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.post_feedforward_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.pre_feedforward_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.11.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.post_feedforward_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.pre_feedforward_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.post_feedforward_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.pre_feedforward_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.post_feedforward_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.pre_feedforward_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.14.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.14.post_feedforward_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.14.pre_feedforward_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.15.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.15.post_feedforward_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.15.pre_feedforward_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.16.input_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.16.mlp.down_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.16.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.16.mlp.up_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.16.post_feedforward_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.16.pre_feedforward_layernorm.weight": "model-00003-of-00007.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.17.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.mlp.gate_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.17.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.post_feedforward_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.pre_feedforward_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00007.safetensors", + "model.layers.18.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.post_feedforward_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.pre_feedforward_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.post_feedforward_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.pre_feedforward_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.2.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.post_feedforward_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.pre_feedforward_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.20.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.20.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.20.post_feedforward_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.20.pre_feedforward_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.21.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.21.post_feedforward_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.21.pre_feedforward_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.22.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.22.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.22.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.22.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.22.post_feedforward_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.22.pre_feedforward_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.23.input_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.23.mlp.down_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.23.mlp.gate_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.23.mlp.up_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.23.post_feedforward_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.23.pre_feedforward_layernorm.weight": "model-00004-of-00007.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.24.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.post_feedforward_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.pre_feedforward_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00004-of-00007.safetensors", + "model.layers.25.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.25.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.25.post_feedforward_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.25.pre_feedforward_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.26.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.26.post_feedforward_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.26.pre_feedforward_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.27.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.27.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.27.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.27.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.27.post_feedforward_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.27.pre_feedforward_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.28.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.28.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.28.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.28.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.28.post_feedforward_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.28.pre_feedforward_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.29.input_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.29.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.29.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.29.mlp.up_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.29.post_feedforward_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.29.pre_feedforward_layernorm.weight": "model-00005-of-00007.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.3.input_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.3.mlp.down_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.mlp.up_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.3.post_feedforward_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.3.pre_feedforward_layernorm.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.30.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.mlp.down_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.30.mlp.gate_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.30.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.post_feedforward_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.pre_feedforward_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00005-of-00007.safetensors", + "model.layers.31.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.31.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.31.post_feedforward_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.31.pre_feedforward_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.32.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.32.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.32.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.32.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.32.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.32.post_feedforward_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.32.pre_feedforward_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.32.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.32.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.32.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.32.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.33.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.33.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.33.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.33.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.33.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.33.post_feedforward_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.33.pre_feedforward_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.33.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.33.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.33.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.33.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.34.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.34.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.34.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.34.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.34.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.34.post_feedforward_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.34.pre_feedforward_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.34.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.34.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.34.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.34.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.35.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.35.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.35.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.35.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.35.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.35.post_feedforward_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.35.pre_feedforward_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.35.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.35.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.35.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.35.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.36.input_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.36.mlp.down_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.36.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.36.mlp.up_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.36.post_attention_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.36.post_feedforward_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.36.pre_feedforward_layernorm.weight": "model-00006-of-00007.safetensors", + "model.layers.36.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.36.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.36.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.36.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.37.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.37.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.37.mlp.gate_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.37.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.37.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.37.post_feedforward_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.37.pre_feedforward_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.37.self_attn.k_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.37.self_attn.o_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.37.self_attn.q_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.37.self_attn.v_proj.weight": "model-00006-of-00007.safetensors", + "model.layers.38.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.38.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.38.mlp.gate_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.38.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.38.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.38.post_feedforward_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.38.pre_feedforward_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.38.self_attn.k_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.38.self_attn.o_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.38.self_attn.q_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.38.self_attn.v_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.39.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.39.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.39.mlp.gate_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.39.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.39.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.39.post_feedforward_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.39.pre_feedforward_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.39.self_attn.k_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.39.self_attn.o_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.39.self_attn.q_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.39.self_attn.v_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.4.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.post_feedforward_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.pre_feedforward_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00007.safetensors", + "model.layers.40.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.40.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.40.mlp.gate_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.40.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.40.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.40.post_feedforward_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.40.pre_feedforward_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.40.self_attn.k_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.40.self_attn.o_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.40.self_attn.q_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.40.self_attn.v_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.41.input_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.41.mlp.down_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.41.mlp.gate_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.41.mlp.up_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.41.post_attention_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.41.post_feedforward_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.41.pre_feedforward_layernorm.weight": "model-00007-of-00007.safetensors", + "model.layers.41.self_attn.k_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.41.self_attn.o_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.41.self_attn.q_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.41.self_attn.v_proj.weight": "model-00007-of-00007.safetensors", + "model.layers.5.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.post_feedforward_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.pre_feedforward_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.post_feedforward_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.pre_feedforward_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.post_feedforward_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.pre_feedforward_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.8.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.8.post_feedforward_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.8.pre_feedforward_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.input_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.9.mlp.down_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.mlp.up_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.9.post_feedforward_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.9.pre_feedforward_layernorm.weight": "model-00002-of-00007.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00007.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00007.safetensors", + "model.norm.weight": "model-00007-of-00007.safetensors" + } +} \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..e547dda --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:884c4014dec52092cbf5664ca02b0a4504fbd4e680245cd63692caccad91b2d4 +size 34362742 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..eea0820 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,17 @@ +{ + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "is_local": true, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "right", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +}