From 285d2daef1bed230f78e8cdc5bc8a9e373e2e781 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Thu, 18 Jun 2026 06:29:16 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: 0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther Source: Original Platform --- .gitattributes | 40 +++ Qwen2.5-Coder-0.5B-F16.gguf | 3 + Qwen2.5-Coder-0.5B-Q3_K_M.gguf | 3 + Qwen2.5-Coder-0.5B-Q4_K_M.gguf | 3 + Qwen2.5-Coder-0.5B-Q5_K_M.gguf | 3 + Qwen2.5-Coder-0.5B-Q6_K.gguf | 3 + README.md | 478 +++++++++++++++++++++++++++++++++ config.json | 54 ++++ generation_config.json | 14 + model.safetensors | 3 + 10 files changed, 604 insertions(+) create mode 100644 .gitattributes create mode 100644 Qwen2.5-Coder-0.5B-F16.gguf create mode 100644 Qwen2.5-Coder-0.5B-Q3_K_M.gguf create mode 100644 Qwen2.5-Coder-0.5B-Q4_K_M.gguf create mode 100644 Qwen2.5-Coder-0.5B-Q5_K_M.gguf create mode 100644 Qwen2.5-Coder-0.5B-Q6_K.gguf create mode 100644 README.md create mode 100644 config.json create mode 100644 generation_config.json create mode 100644 model.safetensors diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..6e9404c --- /dev/null +++ b/.gitattributes @@ -0,0 +1,40 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +Qwen2.5-Coder-0.5B-F16.gguf filter=lfs diff=lfs merge=lfs -text +Qwen2.5-Coder-0.5B-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Qwen2.5-Coder-0.5B-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Qwen2.5-Coder-0.5B-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Qwen2.5-Coder-0.5B-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/Qwen2.5-Coder-0.5B-F16.gguf b/Qwen2.5-Coder-0.5B-F16.gguf new file mode 100644 index 0000000..f92d854 --- /dev/null +++ b/Qwen2.5-Coder-0.5B-F16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf690960971be6647cb3de4ae2c1f7dfbf4668dbeb345f07cf07b324480751da +size 994156224 diff --git a/Qwen2.5-Coder-0.5B-Q3_K_M.gguf b/Qwen2.5-Coder-0.5B-Q3_K_M.gguf new file mode 100644 index 0000000..d8e0ca0 --- /dev/null +++ b/Qwen2.5-Coder-0.5B-Q3_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5aac54b1e163b174cc1a53dc3d3b51429f801acb5ba53071c95f1551ae73f56d +size 355465920 diff --git a/Qwen2.5-Coder-0.5B-Q4_K_M.gguf b/Qwen2.5-Coder-0.5B-Q4_K_M.gguf new file mode 100644 index 0000000..1c8dba6 --- /dev/null +++ b/Qwen2.5-Coder-0.5B-Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ac0b4b3a041d80c2780f95d762ea24b88f7e0d070af5a0943dabbbc98b5b31 +size 397807296 diff --git a/Qwen2.5-Coder-0.5B-Q5_K_M.gguf b/Qwen2.5-Coder-0.5B-Q5_K_M.gguf new file mode 100644 index 0000000..16a2d2c --- /dev/null +++ b/Qwen2.5-Coder-0.5B-Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68d012074b7f9077e8d80f84155c72bee28a018a6357d6c7418d655b738649ae +size 420085440 diff --git a/Qwen2.5-Coder-0.5B-Q6_K.gguf b/Qwen2.5-Coder-0.5B-Q6_K.gguf new file mode 100644 index 0000000..ff5db46 --- /dev/null +++ b/Qwen2.5-Coder-0.5B-Q6_K.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40b7d9323010f2e4293601a3da433b0644337dc7cf82974cef437db86aecdd23 +size 505735872 diff --git a/README.md b/README.md new file mode 100644 index 0000000..612bc0a --- /dev/null +++ b/README.md @@ -0,0 +1,478 @@ +--- +library_name: transformers +tags: +- text-generation +- qwen2.5-coder +- rl-swarm +- genrl-swarm +- grpo +- gensyn +- trl +- code-generation +- programming +- continuous-training +- reinforcement-learning +- safetensors +- gguf +- math +- logic +- conversational +- text-generation-inference +- I am tall_tame_panther +- python +- agent +license: mit +language: +- en +base_model: +- Qwen/Qwen2.5-Coder-0.5B +--- + +

Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm Agent-ID (tall_tame_panther)

+ +

Gensyn RL-Swarm: Training & GGUF Quantized LLMs for Inference

+ +

+Model +GGUF +llama.cpp +Gensyn +version +License +

+ +
+ +[![Gensyn](https://img.shields.io/badge/Powered%20by-Gensyn%20AI-pink?style=for-the-badge)](https://gensyn.ai) + +
+ +--- + +## Model Overview + +Our pick an **experimental (advanced) mode** at this model a continuously trained `Qwen2.5-Coder-0.5B-Instruct` fine-tuned using **Gensyn RL-Swarm** framework with **GRPO (Group Relative Policy Optimization)** and supported format **GGUF (llama.cpp)** for enhanced code generation capabilities. **Note: Current training focuses on programming challenges with adaptive weighted sampling**. + +- **Agent ID:** `tall_tame_panther` +- **Training Status:** 🟢 LIVE - Model updates automatically every 5-10 minutes +- **Auto-Sync GGUF Pipeline Status:** 🟢 LIVE - Commits update automatically every hour +- **Current Progress:** Round 13,533+ / 100,000 (13.53%) +- **Framework Version:** Gensyn RL-Swarm v0.7.0 +- **Contract:** SwarmCoordinator v0.4.2 + +## Key Features + +- **Real-time Training**: Continuous learning with distributed RL across Gensyn swarm network +- **Adaptive System**: Dynamic quality enhanced and dataset weighting for optimal learning +- **Multi-domain Coding**: Trained on MBPP and CodeContests datasets with adaptive sampling +- **GGUF Support**: Multiple quantized formats available (F16, Q3_K_M, Q4_K_M, Q5_K_M, Q6_K) +- **llama.cpp Compatible**: Ready for edge deployment and local inference +- **BF16 Precision**: Trained with bfloat16 for optimal performance +- **TGI Compatible**: Supports Text Generation Inference for production deployment +- **Chat Format Support**: Inherits Qwen2.5 chat template for conversational use + +## Training Data + +The model is trained on a composite dataset with adaptive weighted sampling strategy: + +| Dataset | Initial Weight | Adaptive Range | Focus Area | +|---------|----------------|----------------|------------| +| MBPP | 5 | 4-6 | Basic Python programming problems with test cases | +| CodeContests | 5 | 4-6 | Competitive programming challenges | + +**Total Dataset Size:** Streaming datasets with infinite iteration +**Training Samples per Round:** 2 +**Evaluation:** Real-time via Swarm Coordination with Ollama-based evaluator else Judge + +## Adaptive Sampling Strategy + +> "When the solvers perform well, the proposer automatically increases the difficulty to keep challenging solvers to get better over time." - CodeZero-blog + +```diff +The implementation features an adaptive sampling system that adjusts dataset weights based on performance +The system monitors performance metrics every 5 rounds and adjusts the dataset weights to maintain optimal learning balance +- Update dataset weights based on recent performance +- Calculate recent average performance for each dataset +- Adjust/use weighted sampling if adaptive, based on perform difference +- Performance better on MBPP (Mostly Basic Python Problems) +- Performance better on CodeContests +- Update dataset weights every rounds & keep balanced +``` + +## Adaptive Reward System +### Quality Enhanced Implementation + + +> "Rewards are derived from multiple lightweight checks, ranging from code validity and formatting to alignment with the problem statement, combined into a single interpretable score." - CodeZero-blog + +```diff +The reward system includes a quality data enhanced mechanism that evaluates code structure and documentation +- Calculate quality data enhanced for well-structured code +- Documentation enhanced +- Structure enhanced +- Algorithmic efficiency (simple heuristic) +- Scale with base reward to avoid inflation +``` + +### Adaptive Threshold System + + +```diff +The system also includes an adaptive threshold mechanism that adjusts based on recent performance +- Function adaptive threshold based on recent performance +- Performance quality data is consistently high +``` + +## Quick Performance Simulation +### Reward Comparison + +Based on our simulation with 1000 samples, the adaptive reward system shows significant improvement + +| System | MBPP Avg Reward | CodeContests Avg Reward | Overall Avg Reward | Improvement | +|---------|----------------|------------------------|-------------------|-------------| +| Original | 0.234 | -0.156 | 0.039 | - | +| Adaptive | 0.312 | -0.098 | 0.107 | ~174% | + +### Training Progress + +Based on the logs provided, the model shows consistent progress: + +Metric data visualize train/loss by Weights & Biases (WanDB) +- Soon LIVE! + +``` +[2025-11-14 04:22:50,632][genrl.logging_utils.global_defs][INFO] - __ Joining round: 13053 +[2025-11-14 04:23:50,633][genrl.logging_utils.global_defs][INFO] - Starting round: 13053/100000. +Map: 100%|______________________________________| 1/1 [00:00<00:00, 158.65 examples/s] +Map: 100%|______________________________________| 1/1 [00:00<00:00, 191.92 examples/s] +[2025-11-14 04:25:12,646][genrl.logging_utils.global_defs][INFO] - pushing model to huggingface +Processing Files (1 / 1) : 100%|___| 988MB / 988MB, 94.3MB/s +New Data Upload : 100%|___| 983MB / 983MB, 94.3MB/s +.....kpb5lid/model.safetensors: 100%|___| 988MB / 988MB, 94.3MB/s +[2025-11-14 04:27:01,877][genrl.logging_utils.global_defs][INFO] - Already finished round: 13053. Next check in 160.0s. +``` + +## Quick Start Inferences + +### Standard Transformers + +```bash +from transformers import AutoModelForCausalLM, AutoTokenizer +model = AutoModelForCausalLM.from_pretrained( + "0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther", + torch_dtype="auto", + device_map="auto" +) +tokenizer = AutoTokenizer.from_pretrained("0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther") +prompt = "Write a function to calculate the factorial of a number." +inputs = tokenizer(prompt, return_tensors="pt").to(model.device) +outputs = model.generate(**inputs, max_length=256, temperature=0.7, top_p=0.8) +print(tokenizer.decode(outputs[0], skip_special_tokens=True)) +``` + +### Chat Format (Conversational) + +```bash +from transformers import AutoModelForCausalLM, AutoTokenizer +model = AutoModelForCausalLM.from_pretrained("0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther") +tokenizer = AutoTokenizer.from_pretrained("0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther") +messages = [ + {"role": "system", "content": "You are an expert Python programmer."}, + {"role": "user", "content": "Write a function to check if a string is a palindrome."} +] +text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) +inputs = tokenizer(text, return_tensors="pt") +outputs = model.generate(**inputs, max_length=512) +print(tokenizer.decode(outputs[0])) +``` + +### Text Generation Inference (TGI) + +```bash +docker run -d --gpus all \ + -p 8080:80 \ + -v $PWD/data:/data \ + ghcr.io/huggingface/text-generation-inference:latest \ + --model-id 0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther \ + --max-input-length 4096 \ + --max-total-tokens 8192 +``` + +### GGUF with LLAMA.CPP + +```bash +# Download quantized model (recommended: Q4_K_M) +wget https://huggingface.co/0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther/resolve/main/Qwen2.5-Coder-0.5B-Q4_K_M.gguf +# Run inference +./llama-cli -m Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-Q4_K_M.gguf \ + -p "Write a function to implement binary search in Python." \ + --temp 0.7 --top-p 0.8 +``` + +### Ollama + +```bash +# Create Modelfile +cat > Modelfile << 'EOF' +FROM ./0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther/Qwen2.5-Coder-0.5B-Q4_K_M.gguf +PARAMETER temperature 0.7 +PARAMETER top_p 0.8 +PARAMETER top_k 20 +SYSTEM "You are an expert Python programmer who writes clean, documented code." +EOF +# Create and run +ollama create qwen2.5-coder-swarm -f Modelfile +ollama run qwen2.5-coder-swarm "Write a function to calculate the factorial of a number." +``` + +## Available GGUF Quantization + +| Format | Size | Precision | Use Case | Download | +|--------|------|-----------|----------|----------| +| Safetensors (BF16) | 988 MB | BF16 | Full precision training/fine-tuning | `model.safetensors` | +| GGUF F16 | 994 MB | FP16 | High quality inference | `Qwen2.5-Coder-0.5B-F16.gguf` | +| GGUF Q6_K | 506 MB | 6-bit | High quality compression | `Qwen2.5-Coder-0.5B-Q6_K.gguf` | +| GGUF Q5_K_M | 420 MB | 5-bit | Balanced quality/size | `Qwen2.5-Coder-0.5B-Q5_K_M.gguf` | +| GGUF Q4_K_M | 398 MB | 4-bit | **Recommended** for production | `Qwen2.5-Coder-0.5B-Q4_K_M.gguf` | +| GGUF Q3_K_M | 355 MB | 3-bit | Smallest, fastest | `Qwen2.5-Coder-0.5B-Q3_K_M.gguf` | + +> All GGUF formats are **llama.cpp is compatible** ready to use **Inferences chat** and auto-update be hourly. + + +## Chat Format & Conversational + +This model inherits **Qwen2.5's chat template** for structured conversations. + +### Format Structure + +``` +<|im_start|>system +{system_message} +<|im_end|> +<|im_start|>user +{user_message} +<|im_end|> +<|im_start|>assistant +{assistant_response} +<|im_end|> +``` + +### Chat Template Features + +- **System Instructions**: Guide model behavior with system messages +- **Multi-turn Dialogue**: Maintains conversation context +- **Tool Calling**: Support function calling (if enabled in training) +- **Code Generation**: Optimized for generating Python code + +**Note**: While model supports chat format structurally, optimal conversational performance depends on whether training data included formatted dialogues. Current training focuses on **programming challenges**. + +### Gensyn RL-Swarm Quick-Architecture + +```diff +Training Framework: +- Method: GRPO (Group Relative Policy Optimization) +- Base Model: Qwen/Qwen2.5-Coder-0.5B-Instruct +- Training Regime: bfloat16 mixed precision +- Max Rounds: 100000 +- Update Frequency: Every 5-10 minutes +- Generations per Round: 2 +- Batch size: Combine +- Tree-based Model: 2 tree +- Seed: 42 +Blockchain Integration: +- Network: Gensyn Testnet +- Chain ID: 685685 +- Contract: SwarmCoordinator v0.4.2 +Swarm Communication: +- Framework: Hivemind P2P Backend +- Initial Peers: 3 bootnodes +- Beam Size: 10 +Reward System: +- Manager: RewardManager (SwarmGameManager/CodeGenerationRewards) +- Reward Function: Adaptive with quality enhanced +- Evaluator: Ollama (qwen2.5-coder:1.5b-instruct) +- Judge API: https://codezero-judge.gensyn.ai +``` + +## Model Capabilities + +This model excels at: + +1. **Basic Python Programming**: Functions, loops, conditionals, data structures +2. **Algorithm Implementation**: Sorting, searching, graph algorithms +3. **String Manipulation**: Pattern matching, parsing, formatting +4. **Mathematical Functions**: Calculations, conversions, formulas +5. **Code Documentation**: Writing clear, commented functions +6. **Problem Solving**: Breaking down complex problems into manageable steps + +## Limitations + +- **Specialized Domain**: Optimized for programming challenges; may underperform on creative writing +- **Training in Progress**: Weights update every 5-10 minutes; performance varies +- **Scale**: 0.5B parameters - suitable for edge but not SOTA for complex programming +- **Experimental**: Decentralized RL training; behavior less predictable than supervised models +- **Context**: Best performance within 4K tokens (full 32K supported) + +## Update Schedule + +| Format | Frequency | Trigger | +|--------|-----------|---------| +| Safetensors (BF16) | Every 5-10 min | Automatic via RL-Swarm | +| GGUF (all formats) | Every 3 hour | Auto-conversion pipeline | + +**Auto-Conversion Pipeline:** + +1. Monitors repo for new training commits +2. Downloads latest `model.safetensors` +3. Converts to F16 GGUF base +4. Quantizes to Q3_K_M, Q4_K_M, Q5_K_M, Q6_K +5. Standar formats + +Check commit history for exact timestamps. + +### Architecture Components + +1. **Game Manager**: Orchestrates training rounds and swarm coordination +2. **Trainer**: GRPO implementation for policy optimization +3. **Data Manager**: Dataset loading with adaptive weighted sampling +4. **Reward Manager**: Computes rewards via Ollama evaluator with quality enhanced +5. **Coordinator**: Blockchain integration for swarm state +6. **P2P Backend**: Hivemind DHT for model sharing + +### Training Process + +``` +1. Agent joins swarm via P2P network +2. Coordinator assigns round via smart contract +3. Agent samples data from adaptive weighted datasets +4. Model generates 2 responses +5. Ollama evaluator assesses and assigns rewards with quality enhanced +6. GRPO updates policy based on rewards +7. Updated model shared via DHT +8. Best checkpoint saved to HuggingFace +9. Repeat +``` + +### Decentralization Benefits + +- **Fault Tolerance**: Multiple agents; no single point of failure +- **Diverse Exploration**: Different agents explore different strategies +- **Collective Intelligence**: Agents learn from each other +- **Transparent**: All rounds verified on-chain + +### Software Stack + +- **Framework**: Gensyn RL-Swarm v0.7.0 +- **Library**: transformers v4.57.1 +- **P2P**: hivemind +- **Blockchain**: Gensyn testnet +- **Config**: Hydra + OmegaConf +- **Logging**: WandB integration + +### Hardware Requirements + +**Training GPU:** +- GPU: NVIDIA 4090 24GB+ (BF16 training) +- RAM: 16GB+ +- Cores: 10+ +- Storage: 50GB SSD +- Network: High bandwidth for P2P + +**Training CPU Optimize:** +- CPU: INTEL or AMD +- Cores: 10+ +- RAM: 16GB+ +- Storage: 50GB SSD +- Network: High bandwidth for P2P + +**Inference:** +- Safetensors: 8GB VRAM (GPU) / 16GB RAM (CPU) +- GGUF Q4_K_M: 2GB VRAM (GPU) / 4GB RAM (CPU) +- GGUF Q3_K_M: 3GB RAM (CPU-only) + +### Training Progress Metrics + +| Metric | Value | Target | +|--------|-------|--------| +| Completed Rounds | 13,533+ | 100,000 | +| Training Progress | 13.53% | 100% | +| Update Frequency | 5-10 min | Continuous | + +**Note**: **average\@k:** Average performance across `k` attempts, measuring consistency. **pass\@k:** Probability of at least one correct solution in `k` attempts, measuring capability.Current metrics track training rounds completed in decentralized swarm. + +### Adaptive Reward Performance + +Our adaptive reward system has shown approximately ~174% improvement in reward scores compared to the baseline system: + +``` +Original: + Overall Avg Reward: 0.039 + MBPP Avg Reward: 0.234 + CodeContests Avg Reward: -0.156 +Adaptive: + Overall Avg Reward: 0.107 + MBPP Avg Reward: 0.312 + CodeContests Avg Reward: -0.098 +Improvement: 0.068 (~174% increase) +``` + +## Citation + +``` +@misc{qwen2.5-coder-gensyn-swarm-2025, + author = {0xgrey}, + title = {Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm: Continuous RL Training on Distributed Swarm with Adaptive Rewards}, + year = {2025}, + publisher = {HuggingFace}, + howpublished = {\url{https://huggingface.co/0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther}}, + note = {Agent ID: tall\_tame\_panther} +} +@misc{gensyn-rl-swarm-2025, + title = {Gensyn RL-Swarm: Decentralized Reinforcement Learning Framework}, + author = {Gensyn AI}, + year = {2025}, + url = {https://gensyn.ai} +} +@misc{codezero-2025, + title = {CodeZero: A Collaborative Coding Environment for Distributed RL}, + author = {Gensyn AI}, + year = {2025}, + url = {https://docs.gensyn.ai/testnet/rl-swarm/how-it-works/codezero} +} +``` + +## References + +- **Gensyn Documentation**: https://docs.gensyn.ai/ +- **Gensyn GitHub**: https://github.com/gensyn-ai +- **RL-Swarm Contracts**: https://github.com/gensyn-ai/rl-swarm-contracts +- **Qwen2.5-Coder Model Card**: https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct +- **MBPP Dataset**: https://huggingface.co/datasets/google-research-datasets/mbpp +- **CodeContests Dataset**: https://huggingface.co/datasets/deepmind/code_contests +- **arXiv:1910.09700**: ML Carbon Emissions methodology + + +## Contact + +- **Developer**: 0xgrey +- **Agent ID**: tall_tame_panther +- **Community**: [Gensyn Discord](https://discord.gg/gensyn) + + +**⚠️ Important**: This is a continuously trained model. For reproducibility, specify commit hash: + +``` +git clone https://huggingface.co/0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther +cd Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther +git checkout +``` + +--- + +
+ +**Trained with 🩷 using Gensyn RL-Swarm** + +[![Gensyn](https://img.shields.io/badge/Powered%20by-Gensyn%20AI-pink?style=for-the-badge)](https://gensyn.ai) + +
\ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..a54efe6 --- /dev/null +++ b/config.json @@ -0,0 +1,54 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 24, + "model_type": "qwen2", + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "4.57.1", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..fc71a15 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,14 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.05, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "4.57.1" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..49e1e63 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eefaa4c897b452f74844a82af0ba36f9f86e555bb67fa9062238340bf05a0369 +size 988097824