commit 00b5d770d033172634c43d74959de1415c266665 Author: ModelHub XC Date: Sat Jun 6 23:45:17 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: DireDreadlord/GemCod-Jade-270M Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..ddf4004 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,39 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text +gemcod_logo.png filter=lfs diff=lfs merge=lfs -text +gemcod_logo_b.png filter=lfs diff=lfs merge=lfs -text +gemcod_logo_c.png filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..9254877 --- /dev/null +++ b/README.md @@ -0,0 +1,123 @@ +--- +license: gemma +datasets: +- DireDreadlord/magicoder-glaive-code-instruct +language: +- en +base_model: +- google/gemma-3-270m-it +pipeline_tag: text-generation +tags: +- text-generation-inference +- code +- gemma3 +- SLM +- chat +--- + + +# GemCod270M - Jade (gemma-270m-it-code v4.1.0) + +![GemCod logo](./gemcod_logo_c.png) + +GemCod is a lightweight code generation model finetuned using SFT on the base gemma-270m-it model(https://huggingface.co/google/gemma-3-270m-it). It offers accurate and quick(ish) code snippet and long-form code generation in all major programming languages. +It's small size (270M parameters) allows it to run comfortably on laptop grade GPUs. + +The Jade model serves as an upgrade from the previous Aquamarine model(https://huggingface.co/DireDreadlord/GemCod-codegen-270M), as it provides facilities for superior long-form code generation and explainability of generated snippets whilst keeping the inference time and space requirements roughly the same. + +This model also offers rudimentary Q/A and subject matter expert capabilities on code related subjects. + +--- + + +**Estimated parameters:** ~270M + +**Architecture:** Gemma3 + +**Intended use:** Code snippet and long-form code generation from natural language + +--- + + +## Training data +- Source: magicoder-glaive-code-instruct dataset (https://huggingface.co/datasets/DireDreadlord/magicoder-glaive-code-instruct) +- Rows: ~350,000 rows templated with a custom .jinja chat format +- Training: trained for 3,000 steps on an RTX 3050 (4GB VRAM) + + +## Usage + +Install requirements: + +```bash +pip install -r requirements.txt +pip install transformers datasets accelerate safetensors +``` + + +## Usage (Hugging Face Hub) +You can load it directly from HuggingFace: + +```python +from transformers import AutoTokenizer, AutoModelForCausalLM + + +tokenizer = AutoTokenizer.from_pretrained("DireDreadlord/GemCod-Jade-270M") +model = AutoModelForCausalLM.from_pretrained("DireDreadlord/GemCod-Jade-270M") +model.to(device) +model.eval() +model.resize_token_embeddings(len(tokenizer)) + + +if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + +chat_template = """{% for message in messages %}{% if message['role'] == 'user' %}User: {{ message['content'] }} +{% elif message['role'] == 'assistant' %}Assistant: {{ message['content'] }} +{% endif %}{% endfor %}""" +tokenizer.chat_template = chat_template + +def generate_code(prompt, max_tokens) -> str: + messages = [ + { + "role": "user", + "content": prompt + } + ] + + formatted_prompt = tokenizer.apply_chat_template( + messages, + tokenize=False, + add_generation_prompt=True + ) + + + inputs = tokenizer(formatted_prompt, return_tensors="pt").to(device) + input_length = inputs["input_ids"].shape[1] + + with torch.no_grad(): + outputs = model.generate( + **inputs, + max_new_tokens=512, + do_sample=False, + num_beams=1, + pad_token_id=tokenizer.eos_token_id, + eos_token_id=tokenizer.eos_token_id, + use_cache=False, + ) + + generated_tokens = outputs[0][input_length:] + generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True) + return generated_text + + +prompt = "WAP to train a sklearn model to predict the price of a house based on its size and location" +print("Prompt: ", prompt) + +result = generate_code(prompt) +print(result) +``` + + +## Limitations +- Model for experimental use only; users should employ it as such under license. \ No newline at end of file diff --git a/chat_template.jinja b/chat_template.jinja new file mode 100644 index 0000000..ce831cf --- /dev/null +++ b/chat_template.jinja @@ -0,0 +1,3 @@ +{% for message in messages %}{% if message['role'] == 'user' %}User: {{ message['content'] }} +{% elif message['role'] == 'assistant' %}Assistant: {{ message['content'] }} +{% endif %}{% endfor %} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..9f9540f --- /dev/null +++ b/config.json @@ -0,0 +1,62 @@ +{ + "_sliding_window_pattern": 6, + "architectures": [ + "Gemma3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "attn_logit_softcapping": null, + "bos_token_id": 2, + "dtype": "bfloat16", + "eos_token_id": 1, + "final_logit_softcapping": null, + "head_dim": 256, + "hidden_activation": "gelu_pytorch_tanh", + "hidden_size": 640, + "initializer_range": 0.02, + "intermediate_size": 2048, + "layer_types": [ + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "sliding_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "model_type": "gemma3_text", + "num_attention_heads": 4, + "num_hidden_layers": 18, + "num_key_value_heads": 1, + "pad_token_id": 0, + "query_pre_attn_scalar": 256, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "full_attention": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "sliding_attention": { + "rope_theta": 10000.0, + "rope_type": "default" + } + }, + "sliding_window": 512, + "tie_word_embeddings": true, + "transformers_version": "5.9.0", + "use_bidirectional_attention": false, + "use_cache": false, + "vocab_size": 262145 +} diff --git a/gemcod_logo.png b/gemcod_logo.png new file mode 100644 index 0000000..c8fcb01 --- /dev/null +++ b/gemcod_logo.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:170a5c4066917f847478e58e36f69b9e0d5f60535c550014b5fb61d851a8d85c +size 2980409 diff --git a/gemcod_logo_b.png b/gemcod_logo_b.png new file mode 100644 index 0000000..d1fc2fc --- /dev/null +++ b/gemcod_logo_b.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:960fe91af4ddfee47b1fed7e74b2a09ee2f8fcd805d031d7b0763045be9f09f2 +size 2215458 diff --git a/gemcod_logo_c.png b/gemcod_logo_c.png new file mode 100644 index 0000000..9b36899 --- /dev/null +++ b/gemcod_logo_c.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74623faa5b943af3ada82300b8b3058eac1eede3940ec712a3f1f3424538e473 +size 2675237 diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..1f07995 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,13 @@ +{ + "bos_token_id": 2, + "cache_implementation": "hybrid", + "do_sample": true, + "eos_token_id": [ + 1, + 106 + ], + "pad_token_id": 0, + "top_k": 64, + "top_p": 0.95, + "transformers_version": "5.9.0" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..f4e52a9 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b61f3d74922e78e5456f717c587b90b27a1341333a4e31ef9fb3c4ebb60abf8e +size 536224336 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..4ee8a29 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,13 @@ +torch +numpy +tiktoken +datasets +transformers +peft +bitsandbytes +tqdm +matplotlib +safetensors +huggingface_hub +accelerate +trl \ No newline at end of file diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..f74d183 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daab2354f8a74e70d70b4d1f804939b68a8c9624dd06cb7858e52dd8970e9726 +size 33384567 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..55d93ef --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,25 @@ +{ + "backend": "tokenizers", + "boi_token": "", + "bos_token": "", + "clean_up_tokenization_spaces": false, + "eoi_token": "", + "eos_token": "", + "image_token": "", + "is_local": false, + "local_files_only": false, + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "model_specific_special_tokens": { + "boi_token": "", + "eoi_token": "", + "image_token": "" + }, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": null, + "spaces_between_special_tokens": false, + "tokenizer_class": "GemmaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..6b21dec --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b695394fb85299a23b4a314839592d60dac7bb2ab52e04af5e25d073d47070d +size 5304