From 140547d31e014f800f1f69386b1a8a5b84164c75 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Tue, 19 May 2026 20:03:58 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: harshit1308/smolified-protein-content Source: Original Platform --- .gitattributes | 36 ++++++++++++++++++++++ README.md | 69 +++++++++++++++++++++++++++++++++++++++++++ chat_template.jinja | 47 +++++++++++++++++++++++++++++ config.json | 64 +++++++++++++++++++++++++++++++++++++++ model.safetensors | 3 ++ tokenizer.json | 3 ++ tokenizer_config.json | 25 ++++++++++++++++ 7 files changed, 247 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 chat_template.jinja create mode 100644 config.json create mode 100644 model.safetensors create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..0c25474 --- /dev/null +++ b/README.md @@ -0,0 +1,69 @@ +--- +license: apache-2.0 +language: +- en +tags: +- text-generation-inference +- transformers +- smolify +- dslm +pipeline_tag: text-generation +inference: + parameters: + temperature: 1 + top_p: 0.95 + top_k: 64 +--- + +# 🤏 smolified-protein-content + +> **Intelligence, Distilled.** + +This is a **Domain Specific Language Model (DSLM)** generated by the **Smolify Foundry**. + +It has been synthetically distilled from SOTA reasoning engines into a high-efficiency architecture, optimized for deployment on edge hardware (CPU/NPU) or low-VRAM environments. + +## 📦 Asset Details +- **Origin:** Smolify Foundry (Job ID: `835f8c65`) +- **Architecture:** gemma-3-270m +- **Training Method:** Proprietary Neural Distillation +- **Optimization:** 4-bit Quantized / FP16 Mixed +- **Dataset:** [Link to Dataset](https://huggingface.co/datasets/harshit1308/smolified-protein-content) + +## 🚀 Usage (Inference) +This model is compatible with standard inference backends like vLLM, and Hugging Face Transformers. + +```python +# Example: Running your Sovereign Model +from transformers import AutoModelForCausalLM, AutoTokenizer + +model_id = "harshit1308/smolified-protein-content" +tokenizer = AutoTokenizer.from_pretrained(model_id) +model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto") + +messages = [ + {"role": "system", "content": '''You are a professional nutrition assistant. Provide the estimated protein content for food items based on the quantity specified by the user. Respond with the item name and the total protein amount in grams.'''}, + {"role": "user", "content": '''Whey Protein Powder: 30g'''} +] +text = tokenizer.apply_chat_template( + messages, + tokenize = False, + add_generation_prompt = True, +) +if "gemma-3-270m" == "gemma-3-270m": + text = text.removeprefix('') + +from transformers import TextStreamer +_ = model.generate( + **tokenizer(text, return_tensors = "pt").to(model.device), + max_new_tokens = 1000, + temperature = 1.0, top_p = 0.95, top_k = 64, + streamer = TextStreamer(tokenizer, skip_prompt = True), +) +``` + +## ⚖️ License & Ownership +This model weights are a sovereign asset owned by **harshit1308**. +Generated via [Smolify.ai](https://smolify.ai). + +[](https://smolify.ai) diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..7c7339b --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,47 @@ +{{ bos_token }} +{%- if messages[0]['role'] == 'system' -%} + {%- if messages[0]['content'] is string -%} + {%- set first_user_prefix = messages[0]['content'] + ' + +' -%} + {%- else -%} + {%- set first_user_prefix = messages[0]['content'][0]['text'] + ' + +' -%} + {%- endif -%} + {%- set loop_messages = messages[1:] -%} +{%- else -%} + {%- set first_user_prefix = "" -%} + {%- set loop_messages = messages -%} +{%- endif -%} +{%- for message in loop_messages -%} + {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%} + {{ raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") }} + {%- endif -%} + {%- if (message['role'] == 'assistant') -%} + {%- set role = "model" -%} + {%- else -%} + {%- set role = message['role'] -%} + {%- endif -%} + {{ '' + role + ' +' + (first_user_prefix if loop.first else "") }} + {%- if message['content'] is string -%} + {{ message['content'] | trim }} + {%- elif message['content'] is iterable -%} + {%- for item in message['content'] -%} + {%- if item['type'] == 'image' -%} + {{ '' }} + {%- elif item['type'] == 'text' -%} + {{ item['text'] | trim }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{ raise_exception("Invalid content type") }} + {%- endif -%} + {{ ' +' }} +{%- endfor -%} +{%- if add_generation_prompt -%} + {{ 'model +' }} +{%- endif -%} diff --git a/config.json b/config.json new file mode 100644 index 0000000..bdbb4ff --- /dev/null +++ b/config.json @@ -0,0 +1,64 @@ +{ + "_sliding_window_pattern": 6, + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "bos_token_id": 2, + "torch_dtype": "bfloat16", + "eos_token_id": 106, + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_name": "unsloth/gemma-3-270m-it", + "model_type": "gemma3_text", + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "full_attention": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "sliding_attention": { + "rope_theta": 10000.0, + "rope_type": "default" + } + }, + "sliding_window": 512, + "tie_word_embeddings": true, + "unsloth_fixed": true, + "unsloth_version": "2026.3.11", + "use_bidirectional_attention": false, + "use_cache": false, + "vocab_size": 262144 +} \ No newline at end of file diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..2bd4c55 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c065e8336009e9b8478ced2339610c800f3dfd203e2cf4a722926a137d240b0 +size 536223056 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..899af07 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a74aefb1dc1340a25f29ab8370384b9ed24b2d921d7749ece7bbcfcfdf00d497 +size 33384443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..d38b382 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,25 @@ +{ + "backend": "tokenizers", + "boi_token": "", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eoi_token": "", + "eos_token": "", + "image_token": "", + "is_local": false, + "mask_token": "", + "model_max_length": 32768, + "model_specific_special_tokens": { + "boi_token": "", + "eoi_token": "", + "image_token": "" + }, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": null, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false, + "chat_template": "{{ bos_token }}\n{%- if messages[0]['role'] == 'system' -%}\n {%- if messages[0]['content'] is string -%}\n {%- set first_user_prefix = messages[0]['content'] + '\n\n' -%}\n {%- else -%}\n {%- set first_user_prefix = messages[0]['content'][0]['text'] + '\n\n' -%}\n {%- endif -%}\n {%- set loop_messages = messages[1:] -%}\n{%- else -%}\n {%- set first_user_prefix = \"\" -%}\n {%- set loop_messages = messages -%}\n{%- endif -%}\n{%- for message in loop_messages -%}\n {%- if (message['role'] == 'user') != (loop.index0 % 2 == 0) -%}\n {{ raise_exception(\"Conversation roles must alternate user/assistant/user/assistant/...\") }}\n {%- endif -%}\n {%- if (message['role'] == 'assistant') -%}\n {%- set role = \"model\" -%}\n {%- else -%}\n {%- set role = message['role'] -%}\n {%- endif -%}\n {{ '' + role + '\n' + (first_user_prefix if loop.first else \"\") }}\n {%- if message['content'] is string -%}\n {{ message['content'] | trim }}\n {%- elif message['content'] is iterable -%}\n {%- for item in message['content'] -%}\n {%- if item['type'] == 'image' -%}\n {{ '' }}\n {%- elif item['type'] == 'text' -%}\n {{ item['text'] | trim }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{ raise_exception(\"Invalid content type\") }}\n {%- endif -%}\n {{ '\n' }}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{ 'model\n' }}\n{%- endif -%}\n" +} \ No newline at end of file