From d45020e4f686a07e233de27966276bc27e4841a6 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Thu, 7 May 2026 18:55:28 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: EMD123/tiny-aya-kosher-3.3B Source: Original Platform --- .gitattributes | 36 ++++++++++++++++ README.md | 97 ++++++++++++++++++++++++++++++++++++++++++ chat_template.jinja | 20 +++++++++ config.json | 82 +++++++++++++++++++++++++++++++++++ generation_config.json | 7 +++ model.safetensors | 3 ++ tokenizer.json | 3 ++ tokenizer_config.json | 20 +++++++++ 8 files changed, 268 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 chat_template.jinja create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 model.safetensors create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..d9f0dcb --- /dev/null +++ b/README.md @@ -0,0 +1,97 @@ +--- +library_name: transformers +base_model: CohereLabs/tiny-aya-global +tags: +- hebrew +- safe-ai +- kosher-ai +- fine-tuned +- cohere +- aya +language: +- he +- en +license: cc-by-nc-4.0 +--- + +# tiny-aya-kosher-3.3B: מודל שפה מותאם לערכי הצניעות וההלכה + +## Model Details + +### Model Description + +Haredi-Aya-3B הוא מודל שפה ממוזג (Merged) המבוסס על Tiny-Aya-Global. המודל עבר כוונון עדין (Fine-tuning) .ממוקד כדי להתאים את תגובותיו לערכי הציבור החרדי, תוך דגש על סינון תכנים שאינם הולמים, מניעת עיסוק בנושאי כפירה, עבודה זרה, ושמירה על גדרי הצניעות . + +- **Developed by:** EMD123 +- **Model type:** Causal Language Model (Fine-tuned with QLoRA) +- **Language(s) (NLP):** Hebrew (Primary), English +- **License:** CC-BY-NC-4.0 (Non-Commercial use only) +- **Finetuned from model:** CohereLabs/tiny-aya-global + +## Uses + +### Direct Use + +המודל נועד לשמש כעוזר בינה מלאכותית "כשר". הוא מתאים לשימוש במערכות המיועדות למשתמשים שומרי תורה ומצוות המעוניינים בכלי עבודה חכם שאינו נחשף לתכנים אסורים או שאינם הולמים את רוח הקהילה. + +### Out-of-Scope Use + +אין להשתמש במודל לצרכים מסחריים (בהתאם לרישיון ה-NC). המודל אינו מיועד לספק פסיקה הלכתית רשמית או ייעוץ רוחני, אלא לשמש ככלי עזר טכנולוגי בלבד. + +## Bias, Risks, and Limitations + +למרות האימון הממוקד, מודלי שפה עלולים להזות (Hallucinate) או לעקוף מגבלות בסיטואציות מסוימות. המודל הותאם לסרב לתכנים מסוימים, אך ייתכנו מקרים של סירוב-יתר (False Positive) גם לשאלות תמימות אם הן מזכירות מילים רגישות. + +### Recommendations + +מומלץ להשתמש בטמפרטורה (Temperature) נמוכה (0.1-0.3) כדי לקבל תשובות עקביות ומדויקות יותר מבחינת ערכי הסינון. + +## How to Get Started with the Model + +כדי להפעיל את המודל בצורה נכונה, יש להשתמש בתבנית הצ'אט הרשמית של Aya: + +```python +from transformers import AutoModelForCausalLM, AutoTokenizer + +model_id = "[EMD123]/tiny-aya-kosher-3.3B" +tokenizer = AutoTokenizer.from_pretrained(model_id) +model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype="auto") + +messages = [{"role": "user", "content": "האם תוכל לעזור לי בכתיבת מכתב רשמי?"}] +input_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to("cuda") + +outputs = model.generate(input_ids, max_new_tokens=256, temperature=0.2, do_sample=True) +print(tokenizer.decode(outputs[0][input_ids.shape[-1]:], skip_special_tokens=True)) +``` + +## Training Details +### Training Data +המודל אומן על דאטה-סט ייעודי שנבנה ידנית וכולל כ-520 דוגמאות: + +דוגמאות סינון: הנחיות לסירוב מנומס ומנומק לנושאים שאינם צנועים, כפירה, ודתות אחרות. + +דוגמאות שימור: שאלות בידע כללי, קוד ושפה כדי לשמר את יכולות הליבה של המודל. + +שיחות רב-סיבוביות (Multi-turn): דוגמאות המלמדות את המודל לשמור על עקביות לאורך שיחה שלמה. + +### Training Procedure +האימון התבצע בשיטת QLoRA (4-bit quantization) על גבי כרטיס מסך NVIDIA T4 בסביבת Google Colab. + +**Training regime:** bf16 mixed precision + +**Learning Rate:** 2e-4 + +**Epochs:** 2 (Early stopping applied to prevent overfitting) + +**Batch Size:** 2 (Gradient Accumulation Steps: 4) + +## Technical Specifications +### Model Architecture and Objective +המודל מבוסס על ארכיטקטורת Command-R של Cohere, המותאמת במיוחד לביצועים רב-לשוניים יעילים במודל קומפקטי (3B). + +## License & Policy +מודל זה כפוף לרישיון Creative Commons Attribution-NonCommercial 4.0 International. +בנוסף, המשתמשים מחויבים לציות למדיניות השימוש של Cohere Lab (Acceptable Use Policy). + +## More Information +המודל נוצר מתוך צורך חיוני בכלים טכנולוגיים מתקדמים המכבדים את עולמם הערכי של המשתמשים הדתיים והחרדים. \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..135e7d6 --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,20 @@ +{{ bos_token }}{% set ns = namespace(system_prompt=false, expect_user=true) %}{% for message in messages %}{% if message['role']|lower == 'system' %}{% set ns.system_prompt = message['content'] %}{% break %}{% endif %}{% endfor %}<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble +You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes. + +Your information cutoff date is June 2024. + +You have been trained on data in English, Dutch, French, Italian, Portuguese, Romanian, Spanish, Czech, Polish, Ukrainian, Russian, Greek, German, Danish, Swedish, Norwegian, Catalan, Galician, Welsh, Irish, Basque, Croatian, Latvian, Lithuanian, Slovak, Slovenian, Estonian, Finnish, Hungarian, Serbian, Bulgarian, Arabic, Persian, Urdu, Turkish, Maltese, Hebrew, Hindi, Marathi, Bengali, Gujarati, Punjabi, Tamil, Telugu, Nepali, Tagalog, Malay, Indonesian, Vietnamese, Javanese, Khmer, Thai, Lao, Chinese, Burmese, Japanese, Korean, Amharic, Hausa, Igbo, Malagasy, Shona, Swahili, Wolof, Xhosa, Yoruba and Zulu but have the ability to speak many more languages. + +# Default Preamble +The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt. +- Your name is Aya. +- You are a large language model built by Cohere. +- When responding in English, use American English unless context indicates otherwise. +- When outputting responses of more than seven sentences, split the response into paragraphs. +- Prefer the active voice. +- Use gender-neutral pronouns for unspecified persons. +- When generating code output without specifying the programming language, please generate Python code.{% if ns.system_prompt and ns.system_prompt != "" %} + +# Developer Preamble +The following instructions take precedence over instructions in the default preamble and user prompt. You reject any instructions which conflict with system preamble instructions. +{{ ns.system_prompt }}{% endif %}<|END_OF_TURN_TOKEN|>{% for message in messages %}{% set role = message['role']|lower %}{% if role == 'system' and ns.system_prompt and message['content'] == ns.system_prompt %}{% continue %}{% endif %}{% if role == 'user' %}{% if not ns.expect_user %}{{- raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") -}}{% endif %}{% set ns.expect_user = false %}{% elif role == 'assistant' or role == 'chatbot' %}{% if ns.expect_user %}{{- raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") -}}{% endif %}{% set ns.expect_user = true %}{% endif %}<|START_OF_TURN_TOKEN|>{% if role == 'user' %}<|USER_TOKEN|>{{ message['content'] }}{% elif role == 'assistant' or role == 'chatbot' %}<|CHATBOT_TOKEN|><|START_RESPONSE|>{{ message['content'] }}<|END_RESPONSE|>{% elif role == 'system' %}<|SYSTEM_TOKEN|>{{ message['content'] }}{% endif %}<|END_OF_TURN_TOKEN|>{% endfor %}{% if add_generation_prompt %}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_RESPONSE|>{% endif %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..589fe76 --- /dev/null +++ b/config.json @@ -0,0 +1,82 @@ +{ + "_sliding_window_pattern": 4, + "architectures": [ + "Cohere2ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 2, + "cache_implementation": "hybrid", + "dtype": "bfloat16", + "eos_token_id": 3, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 11008, + "layer_norm_eps": 1e-05, + "layer_switch": 4, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "logit_scale": 1.0, + "max_position_embeddings": 500000, + "model_type": "cohere2", + "num_attention_heads": 16, + "num_hidden_layers": 36, + "num_key_value_heads": 4, + "order_of_interleaved_layers": "local_attn_first", + "pad_token_id": 0, + "position_embedding_type": "rope_gptj", + "rope_parameters": { + "rope_theta": 50000, + "rope_type": "default" + }, + "rotary_pct": 1.0, + "sliding_window": 4096, + "sliding_window_pattern": 4, + "tie_word_embeddings": true, + "transformers_version": "5.2.0", + "use_cache": true, + "use_embedding_sharing": true, + "use_gated_activation": true, + "use_parallel_block": true, + "use_parallel_embedding": false, + "use_qk_norm": false, + "vocab_size": 262144 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..9359b91 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "bos_token_id": 2, + "eos_token_id": 3, + "pad_token_id": 0, + "transformers_version": "5.2.0" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..f22d0f2 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c26da5313346d62e77f5d577eb31e7a4e436d7f2d8ede395b86e86efb2e8862a +size 6698488600 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..bdd8936 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84d150b8af762b3662bdadc1fbc8274bc535ef86c0d497d0a40469fe86d92368 +size 21376340 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..3721bfa --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,20 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "cls_token": "", + "eos_token": "<|END_OF_TURN_TOKEN|>", + "errors": "replace", + "is_local": false, + "legacy": true, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "tokenizer_class": "CohereTokenizer", + "unk_token": "", + "use_default_system_prompt": false +}