From 285d2daef1bed230f78e8cdc5bc8a9e373e2e781 Mon Sep 17 00:00:00 2001
From: ModelHub XC <noreply@modelhub.org.cn>
Date: Thu, 18 Jun 2026 06:29:16 +0800
Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?=
 =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?=
 =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Model: 0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther
Source: Original Platform
---
 .gitattributes                 |  40 +++
 Qwen2.5-Coder-0.5B-F16.gguf    |   3 +
 Qwen2.5-Coder-0.5B-Q3_K_M.gguf |   3 +
 Qwen2.5-Coder-0.5B-Q4_K_M.gguf |   3 +
 Qwen2.5-Coder-0.5B-Q5_K_M.gguf |   3 +
 Qwen2.5-Coder-0.5B-Q6_K.gguf   |   3 +
 README.md                      | 478 +++++++++++++++++++++++++++++++++
 config.json                    |  54 ++++
 generation_config.json         |  14 +
 model.safetensors              |   3 +
 10 files changed, 604 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 Qwen2.5-Coder-0.5B-F16.gguf
 create mode 100644 Qwen2.5-Coder-0.5B-Q3_K_M.gguf
 create mode 100644 Qwen2.5-Coder-0.5B-Q4_K_M.gguf
 create mode 100644 Qwen2.5-Coder-0.5B-Q5_K_M.gguf
 create mode 100644 Qwen2.5-Coder-0.5B-Q6_K.gguf
 create mode 100644 README.md
 create mode 100644 config.json
 create mode 100644 generation_config.json
 create mode 100644 model.safetensors

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..6e9404c
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,40 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+Qwen2.5-Coder-0.5B-F16.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen2.5-Coder-0.5B-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen2.5-Coder-0.5B-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen2.5-Coder-0.5B-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen2.5-Coder-0.5B-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
diff --git a/Qwen2.5-Coder-0.5B-F16.gguf b/Qwen2.5-Coder-0.5B-F16.gguf
new file mode 100644
index 0000000..f92d854
--- /dev/null
+++ b/Qwen2.5-Coder-0.5B-F16.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf690960971be6647cb3de4ae2c1f7dfbf4668dbeb345f07cf07b324480751da
+size 994156224
diff --git a/Qwen2.5-Coder-0.5B-Q3_K_M.gguf b/Qwen2.5-Coder-0.5B-Q3_K_M.gguf
new file mode 100644
index 0000000..d8e0ca0
--- /dev/null
+++ b/Qwen2.5-Coder-0.5B-Q3_K_M.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5aac54b1e163b174cc1a53dc3d3b51429f801acb5ba53071c95f1551ae73f56d
+size 355465920
diff --git a/Qwen2.5-Coder-0.5B-Q4_K_M.gguf b/Qwen2.5-Coder-0.5B-Q4_K_M.gguf
new file mode 100644
index 0000000..1c8dba6
--- /dev/null
+++ b/Qwen2.5-Coder-0.5B-Q4_K_M.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74ac0b4b3a041d80c2780f95d762ea24b88f7e0d070af5a0943dabbbc98b5b31
+size 397807296
diff --git a/Qwen2.5-Coder-0.5B-Q5_K_M.gguf b/Qwen2.5-Coder-0.5B-Q5_K_M.gguf
new file mode 100644
index 0000000..16a2d2c
--- /dev/null
+++ b/Qwen2.5-Coder-0.5B-Q5_K_M.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68d012074b7f9077e8d80f84155c72bee28a018a6357d6c7418d655b738649ae
+size 420085440
diff --git a/Qwen2.5-Coder-0.5B-Q6_K.gguf b/Qwen2.5-Coder-0.5B-Q6_K.gguf
new file mode 100644
index 0000000..ff5db46
--- /dev/null
+++ b/Qwen2.5-Coder-0.5B-Q6_K.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40b7d9323010f2e4293601a3da433b0644337dc7cf82974cef437db86aecdd23
+size 505735872
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..612bc0a
--- /dev/null
+++ b/README.md
@@ -0,0 +1,478 @@
+---
+library_name: transformers
+tags:
+- text-generation
+- qwen2.5-coder
+- rl-swarm
+- genrl-swarm
+- grpo
+- gensyn
+- trl
+- code-generation
+- programming
+- continuous-training
+- reinforcement-learning
+- safetensors
+- gguf
+- math
+- logic
+- conversational
+- text-generation-inference
+- I am tall_tame_panther
+- python
+- agent
+license: mit
+language:
+- en
+base_model:
+- Qwen/Qwen2.5-Coder-0.5B
+---
+
+<h1 align="center">Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm Agent-ID (tall_tame_panther)</h1>
+
+<h2 align="center">Gensyn RL-Swarm: Training & GGUF Quantized LLMs for Inference</h2>
+
+<p align="center">
+<a href="https://huggingface.co/0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Model-blue" alt="Model"></a>
+<a href="https://huggingface.co/0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther/tree/main"><img src="https://img.shields.io/badge/GGUF-Available-8A2BE2" alt="GGUF"></a>
+<img src="https://img.shields.io/badge/LLama.cpp-Compatible-orange" alt="llama.cpp">
+<a href="https://gensyn.ai"><img src="https://img.shields.io/badge/Trained%20with-Gensyn%20RL--Swarm-pink" alt="Gensyn"></a>
+<a href="https://github.com/gensyn-ai/rl-swarm/releases"><img src="https://img.shields.io/github/v/release/gensyn-ai/rl-swarm?label=Version&color=FF0069" alt="version"></a>
+<a href="https://github.com/gensyn-ai/rl-swarm/blob/main/LICENSE.TXT"><img src="https://img.shields.io/badge/License-MIT-green" alt="License"></a>
+</p>
+
+<div align="center">
+
+[![Gensyn](https://img.shields.io/badge/Powered%20by-Gensyn%20AI-pink?style=for-the-badge)](https://gensyn.ai)
+
+</div>
+
+---
+
+## Model Overview
+
+Our pick an **experimental (advanced) mode** at this model a continuously trained `Qwen2.5-Coder-0.5B-Instruct` fine-tuned using **Gensyn RL-Swarm** framework with **GRPO (Group Relative Policy Optimization)** and supported format **GGUF (llama.cpp)** for enhanced code generation capabilities. **Note: Current training focuses on programming challenges with adaptive weighted sampling**.
+
+- **Agent ID:** `tall_tame_panther`
+- **Training Status:** 🟢 LIVE - Model updates automatically every 5-10 minutes
+- **Auto-Sync GGUF Pipeline Status:** 🟢 LIVE - Commits update automatically every hour
+- **Current Progress:** Round 13,533+ / 100,000 (13.53%)
+- **Framework Version:** Gensyn RL-Swarm v0.7.0
+- **Contract:** SwarmCoordinator v0.4.2
+
+## Key Features
+
+- **Real-time Training**: Continuous learning with distributed RL across Gensyn swarm network
+- **Adaptive System**: Dynamic quality enhanced and dataset weighting for optimal learning
+- **Multi-domain Coding**: Trained on MBPP and CodeContests datasets with adaptive sampling
+- **GGUF Support**: Multiple quantized formats available (F16, Q3_K_M, Q4_K_M, Q5_K_M, Q6_K)
+- **llama.cpp Compatible**: Ready for edge deployment and local inference
+- **BF16 Precision**: Trained with bfloat16 for optimal performance
+- **TGI Compatible**: Supports Text Generation Inference for production deployment
+- **Chat Format Support**: Inherits Qwen2.5 chat template for conversational use
+
+## Training Data
+
+The model is trained on a composite dataset with adaptive weighted sampling strategy:
+
+| Dataset | Initial Weight | Adaptive Range | Focus Area |
+|---------|----------------|----------------|------------|
+| MBPP | 5 | 4-6 | Basic Python programming problems with test cases |
+| CodeContests | 5 | 4-6 | Competitive programming challenges |
+
+**Total Dataset Size:** Streaming datasets with infinite iteration  
+**Training Samples per Round:** 2  
+**Evaluation:** Real-time via Swarm Coordination with Ollama-based evaluator else Judge
+
+## Adaptive Sampling Strategy
+
+> "When the solvers perform well, the proposer automatically increases the difficulty to keep challenging solvers to get better over time." - CodeZero-blog
+
+```diff
+The implementation features an adaptive sampling system that adjusts dataset weights based on performance
+The system monitors performance metrics every 5 rounds and adjusts the dataset weights to maintain optimal learning balance
+- Update dataset weights based on recent performance
+- Calculate recent average performance for each dataset
+- Adjust/use weighted sampling if adaptive, based on perform difference
+- Performance better on MBPP (Mostly Basic Python Problems)
+- Performance better on CodeContests
+- Update dataset weights every rounds & keep balanced
+```
+
+## Adaptive Reward System
+### Quality Enhanced Implementation
+
+
+> "Rewards are derived from multiple lightweight checks, ranging from code validity and formatting to alignment with the problem statement, combined into a single interpretable score." - CodeZero-blog
+
+```diff
+The reward system includes a quality data enhanced mechanism that evaluates code structure and documentation
+- Calculate quality data enhanced for well-structured code
+- Documentation enhanced
+- Structure enhanced
+- Algorithmic efficiency (simple heuristic)
+- Scale with base reward to avoid inflation
+```
+
+### Adaptive Threshold System
+
+
+```diff
+The system also includes an adaptive threshold mechanism that adjusts based on recent performance
+- Function adaptive threshold based on recent performance
+- Performance quality data is consistently high
+```
+
+## Quick Performance Simulation
+### Reward Comparison
+
+Based on our simulation with 1000 samples, the adaptive reward system shows significant improvement
+
+| System | MBPP Avg Reward | CodeContests Avg Reward | Overall Avg Reward | Improvement |
+|---------|----------------|------------------------|-------------------|-------------|
+| Original | 0.234 | -0.156 | 0.039 | - |
+| Adaptive | 0.312 | -0.098 | 0.107 | ~174% |
+
+### Training Progress
+
+Based on the logs provided, the model shows consistent progress:
+
+Metric data visualize train/loss by Weights & Biases (WanDB)
+- Soon LIVE!
+
+```
+[2025-11-14 04:22:50,632][genrl.logging_utils.global_defs][INFO] - __ Joining round: 13053
+[2025-11-14 04:23:50,633][genrl.logging_utils.global_defs][INFO] - Starting round: 13053/100000.
+Map: 100%|______________________________________| 1/1 [00:00<00:00, 158.65 examples/s]
+Map: 100%|______________________________________| 1/1 [00:00<00:00, 191.92 examples/s]
+[2025-11-14 04:25:12,646][genrl.logging_utils.global_defs][INFO] - pushing model to huggingface
+Processing Files (1 / 1)      : 100%|___|  988MB /  988MB, 94.3MB/s
+New Data Upload               : 100%|___|  983MB /  983MB, 94.3MB/s  
+.....kpb5lid/model.safetensors: 100%|___|  988MB /  988MB, 94.3MB/s
+[2025-11-14 04:27:01,877][genrl.logging_utils.global_defs][INFO] - Already finished round: 13053. Next check in 160.0s.
+```
+
+## Quick Start Inferences
+
+### Standard Transformers
+
+```bash
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained(
+    "0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther",
+    torch_dtype="auto",
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained("0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther")
+prompt = "Write a function to calculate the factorial of a number."
+inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+outputs = model.generate(**inputs, max_length=256, temperature=0.7, top_p=0.8)
+print(tokenizer.decode(outputs[0], skip_special_tokens=True))
+```
+
+### Chat Format (Conversational)
+
+```bash
+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained("0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther")
+tokenizer = AutoTokenizer.from_pretrained("0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther")
+messages = [
+    {"role": "system", "content": "You are an expert Python programmer."},
+    {"role": "user", "content": "Write a function to check if a string is a palindrome."}
+]
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+inputs = tokenizer(text, return_tensors="pt")
+outputs = model.generate(**inputs, max_length=512)
+print(tokenizer.decode(outputs[0]))
+```
+
+### Text Generation Inference (TGI)
+
+```bash
+docker run -d --gpus all \
+  -p 8080:80 \
+  -v $PWD/data:/data \
+  ghcr.io/huggingface/text-generation-inference:latest \
+  --model-id 0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther \
+  --max-input-length 4096 \
+  --max-total-tokens 8192
+```
+
+### GGUF with LLAMA.CPP
+
+```bash
+# Download quantized model (recommended: Q4_K_M)
+wget https://huggingface.co/0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther/resolve/main/Qwen2.5-Coder-0.5B-Q4_K_M.gguf
+# Run inference
+./llama-cli -m Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-Q4_K_M.gguf \
+  -p "Write a function to implement binary search in Python." \
+  --temp 0.7 --top-p 0.8
+```
+
+### Ollama
+
+```bash
+# Create Modelfile
+cat > Modelfile << 'EOF'
+FROM ./0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther/Qwen2.5-Coder-0.5B-Q4_K_M.gguf
+PARAMETER temperature 0.7
+PARAMETER top_p 0.8
+PARAMETER top_k 20
+SYSTEM "You are an expert Python programmer who writes clean, documented code."
+EOF
+# Create and run
+ollama create qwen2.5-coder-swarm -f Modelfile
+ollama run qwen2.5-coder-swarm "Write a function to calculate the factorial of a number."
+```
+
+## Available GGUF Quantization
+
+| Format | Size | Precision | Use Case | Download |
+|--------|------|-----------|----------|----------|
+| Safetensors (BF16) | 988 MB | BF16 | Full precision training/fine-tuning | `model.safetensors` |
+| GGUF F16 | 994 MB | FP16 | High quality inference | `Qwen2.5-Coder-0.5B-F16.gguf` |
+| GGUF Q6_K | 506 MB | 6-bit | High quality compression | `Qwen2.5-Coder-0.5B-Q6_K.gguf` |
+| GGUF Q5_K_M | 420 MB | 5-bit | Balanced quality/size | `Qwen2.5-Coder-0.5B-Q5_K_M.gguf` |
+| GGUF Q4_K_M | 398 MB | 4-bit | **Recommended** for production | `Qwen2.5-Coder-0.5B-Q4_K_M.gguf` |
+| GGUF Q3_K_M | 355 MB | 3-bit | Smallest, fastest | `Qwen2.5-Coder-0.5B-Q3_K_M.gguf` |
+
+> All GGUF formats are **llama.cpp is compatible** ready to use **Inferences chat** and auto-update be hourly.
+
+
+## Chat Format & Conversational
+
+This model inherits **Qwen2.5's chat template** for structured conversations.
+
+### Format Structure
+
+```
+<|im_start|>system
+{system_message}
+<|im_end|>
+<|im_start|>user
+{user_message}
+<|im_end|>
+<|im_start|>assistant
+{assistant_response}
+<|im_end|>
+```
+
+### Chat Template Features
+
+- **System Instructions**: Guide model behavior with system messages
+- **Multi-turn Dialogue**: Maintains conversation context
+- **Tool Calling**: Support function calling (if enabled in training)
+- **Code Generation**: Optimized for generating Python code
+
+**Note**: While model supports chat format structurally, optimal conversational performance depends on whether training data included formatted dialogues. Current training focuses on **programming challenges**.
+
+### Gensyn RL-Swarm Quick-Architecture
+
+```diff
+Training Framework:
+- Method: GRPO (Group Relative Policy Optimization)
+- Base Model: Qwen/Qwen2.5-Coder-0.5B-Instruct
+- Training Regime: bfloat16 mixed precision
+- Max Rounds: 100000
+- Update Frequency: Every 5-10 minutes
+- Generations per Round: 2
+- Batch size: Combine
+- Tree-based Model: 2 tree
+- Seed: 42
+Blockchain Integration:
+- Network: Gensyn Testnet
+- Chain ID: 685685
+- Contract: SwarmCoordinator v0.4.2
+Swarm Communication:
+- Framework: Hivemind P2P Backend
+- Initial Peers: 3 bootnodes
+- Beam Size: 10
+Reward System:
+- Manager: RewardManager (SwarmGameManager/CodeGenerationRewards)
+- Reward Function: Adaptive with quality enhanced
+- Evaluator: Ollama (qwen2.5-coder:1.5b-instruct)
+- Judge API: https://codezero-judge.gensyn.ai
+```
+
+## Model Capabilities
+
+This model excels at:
+
+1. **Basic Python Programming**: Functions, loops, conditionals, data structures
+2. **Algorithm Implementation**: Sorting, searching, graph algorithms
+3. **String Manipulation**: Pattern matching, parsing, formatting
+4. **Mathematical Functions**: Calculations, conversions, formulas
+5. **Code Documentation**: Writing clear, commented functions
+6. **Problem Solving**: Breaking down complex problems into manageable steps
+
+## Limitations
+
+- **Specialized Domain**: Optimized for programming challenges; may underperform on creative writing
+- **Training in Progress**: Weights update every 5-10 minutes; performance varies
+- **Scale**: 0.5B parameters - suitable for edge but not SOTA for complex programming
+- **Experimental**: Decentralized RL training; behavior less predictable than supervised models
+- **Context**: Best performance within 4K tokens (full 32K supported)
+
+## Update Schedule
+
+| Format | Frequency | Trigger |
+|--------|-----------|---------|
+| Safetensors (BF16) | Every 5-10 min | Automatic via RL-Swarm |
+| GGUF (all formats) | Every 3 hour | Auto-conversion pipeline |
+
+**Auto-Conversion Pipeline:**
+
+1. Monitors repo for new training commits
+2. Downloads latest `model.safetensors`
+3. Converts to F16 GGUF base
+4. Quantizes to Q3_K_M, Q4_K_M, Q5_K_M, Q6_K
+5. Standar formats
+
+Check commit history for exact timestamps.
+
+### Architecture Components
+
+1. **Game Manager**: Orchestrates training rounds and swarm coordination
+2. **Trainer**: GRPO implementation for policy optimization
+3. **Data Manager**: Dataset loading with adaptive weighted sampling
+4. **Reward Manager**: Computes rewards via Ollama evaluator with quality enhanced
+5. **Coordinator**: Blockchain integration for swarm state
+6. **P2P Backend**: Hivemind DHT for model sharing
+
+### Training Process
+
+```
+1. Agent joins swarm via P2P network
+2. Coordinator assigns round via smart contract
+3. Agent samples data from adaptive weighted datasets
+4. Model generates 2 responses
+5. Ollama evaluator assesses and assigns rewards with quality enhanced
+6. GRPO updates policy based on rewards
+7. Updated model shared via DHT
+8. Best checkpoint saved to HuggingFace
+9. Repeat
+```
+
+### Decentralization Benefits
+
+- **Fault Tolerance**: Multiple agents; no single point of failure
+- **Diverse Exploration**: Different agents explore different strategies
+- **Collective Intelligence**: Agents learn from each other
+- **Transparent**: All rounds verified on-chain
+
+### Software Stack
+
+- **Framework**: Gensyn RL-Swarm v0.7.0
+- **Library**: transformers v4.57.1
+- **P2P**: hivemind
+- **Blockchain**: Gensyn testnet
+- **Config**: Hydra + OmegaConf
+- **Logging**: WandB integration
+
+### Hardware Requirements
+
+**Training GPU:**
+- GPU: NVIDIA 4090 24GB+ (BF16 training)
+- RAM: 16GB+
+- Cores: 10+
+- Storage: 50GB SSD
+- Network: High bandwidth for P2P
+
+**Training CPU Optimize:**
+- CPU: INTEL or AMD
+- Cores: 10+
+- RAM: 16GB+
+- Storage: 50GB SSD
+- Network: High bandwidth for P2P
+ 
+**Inference:**
+- Safetensors: 8GB VRAM (GPU) / 16GB RAM (CPU)
+- GGUF Q4_K_M: 2GB VRAM (GPU) / 4GB RAM (CPU)
+- GGUF Q3_K_M: 3GB RAM (CPU-only)
+
+### Training Progress Metrics
+
+| Metric | Value | Target |
+|--------|-------|--------|
+| Completed Rounds | 13,533+ | 100,000 |
+| Training Progress | 13.53% | 100% |
+| Update Frequency | 5-10 min | Continuous |
+
+**Note**: **average\@k:** Average performance across `k` attempts, measuring consistency. **pass\@k:** Probability of at least one correct solution in `k` attempts, measuring capability.Current metrics track training rounds completed in decentralized swarm.
+
+### Adaptive Reward Performance
+
+Our adaptive reward system has shown approximately ~174% improvement in reward scores compared to the baseline system:
+
+```
+Original:
+  Overall Avg Reward: 0.039
+  MBPP Avg Reward: 0.234
+  CodeContests Avg Reward: -0.156
+Adaptive:
+  Overall Avg Reward: 0.107
+  MBPP Avg Reward: 0.312
+  CodeContests Avg Reward: -0.098
+Improvement: 0.068 (~174% increase)
+```
+
+## Citation
+
+```
+@misc{qwen2.5-coder-gensyn-swarm-2025,
+  author = {0xgrey},
+  title = {Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm: Continuous RL Training on Distributed Swarm with Adaptive Rewards},
+  year = {2025},
+  publisher = {HuggingFace},
+  howpublished = {\url{https://huggingface.co/0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther}},
+  note = {Agent ID: tall\_tame\_panther}
+}
+@misc{gensyn-rl-swarm-2025,
+  title = {Gensyn RL-Swarm: Decentralized Reinforcement Learning Framework},
+  author = {Gensyn AI},
+  year = {2025},
+  url = {https://gensyn.ai}
+}
+@misc{codezero-2025,
+  title = {CodeZero: A Collaborative Coding Environment for Distributed RL},
+  author = {Gensyn AI},
+  year = {2025},
+  url = {https://docs.gensyn.ai/testnet/rl-swarm/how-it-works/codezero}
+}
+```
+
+## References
+
+- **Gensyn Documentation**: https://docs.gensyn.ai/
+- **Gensyn GitHub**: https://github.com/gensyn-ai
+- **RL-Swarm Contracts**: https://github.com/gensyn-ai/rl-swarm-contracts
+- **Qwen2.5-Coder Model Card**: https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct
+- **MBPP Dataset**: https://huggingface.co/datasets/google-research-datasets/mbpp
+- **CodeContests Dataset**: https://huggingface.co/datasets/deepmind/code_contests
+- **arXiv:1910.09700**: ML Carbon Emissions methodology
+
+
+## Contact
+
+- **Developer**: 0xgrey
+- **Agent ID**: tall_tame_panther
+- **Community**: [Gensyn Discord](https://discord.gg/gensyn)
+
+
+**⚠️ Important**: This is a continuously trained model. For reproducibility, specify commit hash:
+
+```
+git clone https://huggingface.co/0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther
+cd Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther
+git checkout <commit-hash>
+```
+
+---
+
+<div align="center">
+
+**Trained with 🩷 using Gensyn RL-Swarm**
+
+[![Gensyn](https://img.shields.io/badge/Powered%20by-Gensyn%20AI-pink?style=for-the-badge)](https://gensyn.ai)
+
+</div>
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..a54efe6
--- /dev/null
+++ b/config.json
@@ -0,0 +1,54 @@
+{
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "dtype": "bfloat16",
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 896,
+  "initializer_range": 0.02,
+  "intermediate_size": 4864,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 24,
+  "model_type": "qwen2",
+  "num_attention_heads": 14,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..fc71a15
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,14 @@
+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "repetition_penalty": 1.05,
+  "temperature": 0.7,
+  "top_k": 20,
+  "top_p": 0.8,
+  "transformers_version": "4.57.1"
+}
diff --git a/model.safetensors b/model.safetensors
new file mode 100644
index 0000000..49e1e63
--- /dev/null
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eefaa4c897b452f74844a82af0ba36f9f86e555bb67fa9062238340bf05a0369
+size 988097824