初始化项目,由ModelHub XC社区提供模型
Model: 0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther Source: Original Platform
This commit is contained in:
40
.gitattributes
vendored
Normal file
40
.gitattributes
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
*.7z filter=lfs diff=lfs merge=lfs -text
|
||||
*.arrow filter=lfs diff=lfs merge=lfs -text
|
||||
*.bin filter=lfs diff=lfs merge=lfs -text
|
||||
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
||||
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
||||
*.ftz filter=lfs diff=lfs merge=lfs -text
|
||||
*.gz filter=lfs diff=lfs merge=lfs -text
|
||||
*.h5 filter=lfs diff=lfs merge=lfs -text
|
||||
*.joblib filter=lfs diff=lfs merge=lfs -text
|
||||
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
||||
*.model filter=lfs diff=lfs merge=lfs -text
|
||||
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
||||
*.npy filter=lfs diff=lfs merge=lfs -text
|
||||
*.npz filter=lfs diff=lfs merge=lfs -text
|
||||
*.onnx filter=lfs diff=lfs merge=lfs -text
|
||||
*.ot filter=lfs diff=lfs merge=lfs -text
|
||||
*.parquet filter=lfs diff=lfs merge=lfs -text
|
||||
*.pb filter=lfs diff=lfs merge=lfs -text
|
||||
*.pickle filter=lfs diff=lfs merge=lfs -text
|
||||
*.pkl filter=lfs diff=lfs merge=lfs -text
|
||||
*.pt filter=lfs diff=lfs merge=lfs -text
|
||||
*.pth filter=lfs diff=lfs merge=lfs -text
|
||||
*.rar filter=lfs diff=lfs merge=lfs -text
|
||||
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
||||
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
||||
*.tar filter=lfs diff=lfs merge=lfs -text
|
||||
*.tflite filter=lfs diff=lfs merge=lfs -text
|
||||
*.tgz filter=lfs diff=lfs merge=lfs -text
|
||||
*.wasm filter=lfs diff=lfs merge=lfs -text
|
||||
*.xz filter=lfs diff=lfs merge=lfs -text
|
||||
*.zip filter=lfs diff=lfs merge=lfs -text
|
||||
*.zst filter=lfs diff=lfs merge=lfs -text
|
||||
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2.5-Coder-0.5B-F16.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2.5-Coder-0.5B-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2.5-Coder-0.5B-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2.5-Coder-0.5B-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
Qwen2.5-Coder-0.5B-Q6_K.gguf filter=lfs diff=lfs merge=lfs -text
|
||||
3
Qwen2.5-Coder-0.5B-F16.gguf
Normal file
3
Qwen2.5-Coder-0.5B-F16.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:cf690960971be6647cb3de4ae2c1f7dfbf4668dbeb345f07cf07b324480751da
|
||||
size 994156224
|
||||
3
Qwen2.5-Coder-0.5B-Q3_K_M.gguf
Normal file
3
Qwen2.5-Coder-0.5B-Q3_K_M.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:5aac54b1e163b174cc1a53dc3d3b51429f801acb5ba53071c95f1551ae73f56d
|
||||
size 355465920
|
||||
3
Qwen2.5-Coder-0.5B-Q4_K_M.gguf
Normal file
3
Qwen2.5-Coder-0.5B-Q4_K_M.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:74ac0b4b3a041d80c2780f95d762ea24b88f7e0d070af5a0943dabbbc98b5b31
|
||||
size 397807296
|
||||
3
Qwen2.5-Coder-0.5B-Q5_K_M.gguf
Normal file
3
Qwen2.5-Coder-0.5B-Q5_K_M.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:68d012074b7f9077e8d80f84155c72bee28a018a6357d6c7418d655b738649ae
|
||||
size 420085440
|
||||
3
Qwen2.5-Coder-0.5B-Q6_K.gguf
Normal file
3
Qwen2.5-Coder-0.5B-Q6_K.gguf
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:40b7d9323010f2e4293601a3da433b0644337dc7cf82974cef437db86aecdd23
|
||||
size 505735872
|
||||
478
README.md
Normal file
478
README.md
Normal file
@@ -0,0 +1,478 @@
|
||||
---
|
||||
library_name: transformers
|
||||
tags:
|
||||
- text-generation
|
||||
- qwen2.5-coder
|
||||
- rl-swarm
|
||||
- genrl-swarm
|
||||
- grpo
|
||||
- gensyn
|
||||
- trl
|
||||
- code-generation
|
||||
- programming
|
||||
- continuous-training
|
||||
- reinforcement-learning
|
||||
- safetensors
|
||||
- gguf
|
||||
- math
|
||||
- logic
|
||||
- conversational
|
||||
- text-generation-inference
|
||||
- I am tall_tame_panther
|
||||
- python
|
||||
- agent
|
||||
license: mit
|
||||
language:
|
||||
- en
|
||||
base_model:
|
||||
- Qwen/Qwen2.5-Coder-0.5B
|
||||
---
|
||||
|
||||
<h1 align="center">Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm Agent-ID (tall_tame_panther)</h1>
|
||||
|
||||
<h2 align="center">Gensyn RL-Swarm: Training & GGUF Quantized LLMs for Inference</h2>
|
||||
|
||||
<p align="center">
|
||||
<a href="https://huggingface.co/0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther"><img src="https://img.shields.io/badge/🤗%20Hugging%20Face-Model-blue" alt="Model"></a>
|
||||
<a href="https://huggingface.co/0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther/tree/main"><img src="https://img.shields.io/badge/GGUF-Available-8A2BE2" alt="GGUF"></a>
|
||||
<img src="https://img.shields.io/badge/LLama.cpp-Compatible-orange" alt="llama.cpp">
|
||||
<a href="https://gensyn.ai"><img src="https://img.shields.io/badge/Trained%20with-Gensyn%20RL--Swarm-pink" alt="Gensyn"></a>
|
||||
<a href="https://github.com/gensyn-ai/rl-swarm/releases"><img src="https://img.shields.io/github/v/release/gensyn-ai/rl-swarm?label=Version&color=FF0069" alt="version"></a>
|
||||
<a href="https://github.com/gensyn-ai/rl-swarm/blob/main/LICENSE.TXT"><img src="https://img.shields.io/badge/License-MIT-green" alt="License"></a>
|
||||
</p>
|
||||
|
||||
<div align="center">
|
||||
|
||||
[](https://gensyn.ai)
|
||||
|
||||
</div>
|
||||
|
||||
---
|
||||
|
||||
## Model Overview
|
||||
|
||||
Our pick an **experimental (advanced) mode** at this model a continuously trained `Qwen2.5-Coder-0.5B-Instruct` fine-tuned using **Gensyn RL-Swarm** framework with **GRPO (Group Relative Policy Optimization)** and supported format **GGUF (llama.cpp)** for enhanced code generation capabilities. **Note: Current training focuses on programming challenges with adaptive weighted sampling**.
|
||||
|
||||
- **Agent ID:** `tall_tame_panther`
|
||||
- **Training Status:** 🟢 LIVE - Model updates automatically every 5-10 minutes
|
||||
- **Auto-Sync GGUF Pipeline Status:** 🟢 LIVE - Commits update automatically every hour
|
||||
- **Current Progress:** Round 13,533+ / 100,000 (13.53%)
|
||||
- **Framework Version:** Gensyn RL-Swarm v0.7.0
|
||||
- **Contract:** SwarmCoordinator v0.4.2
|
||||
|
||||
## Key Features
|
||||
|
||||
- **Real-time Training**: Continuous learning with distributed RL across Gensyn swarm network
|
||||
- **Adaptive System**: Dynamic quality enhanced and dataset weighting for optimal learning
|
||||
- **Multi-domain Coding**: Trained on MBPP and CodeContests datasets with adaptive sampling
|
||||
- **GGUF Support**: Multiple quantized formats available (F16, Q3_K_M, Q4_K_M, Q5_K_M, Q6_K)
|
||||
- **llama.cpp Compatible**: Ready for edge deployment and local inference
|
||||
- **BF16 Precision**: Trained with bfloat16 for optimal performance
|
||||
- **TGI Compatible**: Supports Text Generation Inference for production deployment
|
||||
- **Chat Format Support**: Inherits Qwen2.5 chat template for conversational use
|
||||
|
||||
## Training Data
|
||||
|
||||
The model is trained on a composite dataset with adaptive weighted sampling strategy:
|
||||
|
||||
| Dataset | Initial Weight | Adaptive Range | Focus Area |
|
||||
|---------|----------------|----------------|------------|
|
||||
| MBPP | 5 | 4-6 | Basic Python programming problems with test cases |
|
||||
| CodeContests | 5 | 4-6 | Competitive programming challenges |
|
||||
|
||||
**Total Dataset Size:** Streaming datasets with infinite iteration
|
||||
**Training Samples per Round:** 2
|
||||
**Evaluation:** Real-time via Swarm Coordination with Ollama-based evaluator else Judge
|
||||
|
||||
## Adaptive Sampling Strategy
|
||||
|
||||
> "When the solvers perform well, the proposer automatically increases the difficulty to keep challenging solvers to get better over time." - CodeZero-blog
|
||||
|
||||
```diff
|
||||
The implementation features an adaptive sampling system that adjusts dataset weights based on performance
|
||||
The system monitors performance metrics every 5 rounds and adjusts the dataset weights to maintain optimal learning balance
|
||||
- Update dataset weights based on recent performance
|
||||
- Calculate recent average performance for each dataset
|
||||
- Adjust/use weighted sampling if adaptive, based on perform difference
|
||||
- Performance better on MBPP (Mostly Basic Python Problems)
|
||||
- Performance better on CodeContests
|
||||
- Update dataset weights every rounds & keep balanced
|
||||
```
|
||||
|
||||
## Adaptive Reward System
|
||||
### Quality Enhanced Implementation
|
||||
|
||||
|
||||
> "Rewards are derived from multiple lightweight checks, ranging from code validity and formatting to alignment with the problem statement, combined into a single interpretable score." - CodeZero-blog
|
||||
|
||||
```diff
|
||||
The reward system includes a quality data enhanced mechanism that evaluates code structure and documentation
|
||||
- Calculate quality data enhanced for well-structured code
|
||||
- Documentation enhanced
|
||||
- Structure enhanced
|
||||
- Algorithmic efficiency (simple heuristic)
|
||||
- Scale with base reward to avoid inflation
|
||||
```
|
||||
|
||||
### Adaptive Threshold System
|
||||
|
||||
|
||||
```diff
|
||||
The system also includes an adaptive threshold mechanism that adjusts based on recent performance
|
||||
- Function adaptive threshold based on recent performance
|
||||
- Performance quality data is consistently high
|
||||
```
|
||||
|
||||
## Quick Performance Simulation
|
||||
### Reward Comparison
|
||||
|
||||
Based on our simulation with 1000 samples, the adaptive reward system shows significant improvement
|
||||
|
||||
| System | MBPP Avg Reward | CodeContests Avg Reward | Overall Avg Reward | Improvement |
|
||||
|---------|----------------|------------------------|-------------------|-------------|
|
||||
| Original | 0.234 | -0.156 | 0.039 | - |
|
||||
| Adaptive | 0.312 | -0.098 | 0.107 | ~174% |
|
||||
|
||||
### Training Progress
|
||||
|
||||
Based on the logs provided, the model shows consistent progress:
|
||||
|
||||
Metric data visualize train/loss by Weights & Biases (WanDB)
|
||||
- Soon LIVE!
|
||||
|
||||
```
|
||||
[2025-11-14 04:22:50,632][genrl.logging_utils.global_defs][INFO] - __ Joining round: 13053
|
||||
[2025-11-14 04:23:50,633][genrl.logging_utils.global_defs][INFO] - Starting round: 13053/100000.
|
||||
Map: 100%|______________________________________| 1/1 [00:00<00:00, 158.65 examples/s]
|
||||
Map: 100%|______________________________________| 1/1 [00:00<00:00, 191.92 examples/s]
|
||||
[2025-11-14 04:25:12,646][genrl.logging_utils.global_defs][INFO] - pushing model to huggingface
|
||||
Processing Files (1 / 1) : 100%|___| 988MB / 988MB, 94.3MB/s
|
||||
New Data Upload : 100%|___| 983MB / 983MB, 94.3MB/s
|
||||
.....kpb5lid/model.safetensors: 100%|___| 988MB / 988MB, 94.3MB/s
|
||||
[2025-11-14 04:27:01,877][genrl.logging_utils.global_defs][INFO] - Already finished round: 13053. Next check in 160.0s.
|
||||
```
|
||||
|
||||
## Quick Start Inferences
|
||||
|
||||
### Standard Transformers
|
||||
|
||||
```bash
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther",
|
||||
torch_dtype="auto",
|
||||
device_map="auto"
|
||||
)
|
||||
tokenizer = AutoTokenizer.from_pretrained("0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther")
|
||||
prompt = "Write a function to calculate the factorial of a number."
|
||||
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
|
||||
outputs = model.generate(**inputs, max_length=256, temperature=0.7, top_p=0.8)
|
||||
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
||||
```
|
||||
|
||||
### Chat Format (Conversational)
|
||||
|
||||
```bash
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
model = AutoModelForCausalLM.from_pretrained("0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther")
|
||||
tokenizer = AutoTokenizer.from_pretrained("0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther")
|
||||
messages = [
|
||||
{"role": "system", "content": "You are an expert Python programmer."},
|
||||
{"role": "user", "content": "Write a function to check if a string is a palindrome."}
|
||||
]
|
||||
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
||||
inputs = tokenizer(text, return_tensors="pt")
|
||||
outputs = model.generate(**inputs, max_length=512)
|
||||
print(tokenizer.decode(outputs[0]))
|
||||
```
|
||||
|
||||
### Text Generation Inference (TGI)
|
||||
|
||||
```bash
|
||||
docker run -d --gpus all \
|
||||
-p 8080:80 \
|
||||
-v $PWD/data:/data \
|
||||
ghcr.io/huggingface/text-generation-inference:latest \
|
||||
--model-id 0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther \
|
||||
--max-input-length 4096 \
|
||||
--max-total-tokens 8192
|
||||
```
|
||||
|
||||
### GGUF with LLAMA.CPP
|
||||
|
||||
```bash
|
||||
# Download quantized model (recommended: Q4_K_M)
|
||||
wget https://huggingface.co/0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther/resolve/main/Qwen2.5-Coder-0.5B-Q4_K_M.gguf
|
||||
# Run inference
|
||||
./llama-cli -m Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-Q4_K_M.gguf \
|
||||
-p "Write a function to implement binary search in Python." \
|
||||
--temp 0.7 --top-p 0.8
|
||||
```
|
||||
|
||||
### Ollama
|
||||
|
||||
```bash
|
||||
# Create Modelfile
|
||||
cat > Modelfile << 'EOF'
|
||||
FROM ./0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther/Qwen2.5-Coder-0.5B-Q4_K_M.gguf
|
||||
PARAMETER temperature 0.7
|
||||
PARAMETER top_p 0.8
|
||||
PARAMETER top_k 20
|
||||
SYSTEM "You are an expert Python programmer who writes clean, documented code."
|
||||
EOF
|
||||
# Create and run
|
||||
ollama create qwen2.5-coder-swarm -f Modelfile
|
||||
ollama run qwen2.5-coder-swarm "Write a function to calculate the factorial of a number."
|
||||
```
|
||||
|
||||
## Available GGUF Quantization
|
||||
|
||||
| Format | Size | Precision | Use Case | Download |
|
||||
|--------|------|-----------|----------|----------|
|
||||
| Safetensors (BF16) | 988 MB | BF16 | Full precision training/fine-tuning | `model.safetensors` |
|
||||
| GGUF F16 | 994 MB | FP16 | High quality inference | `Qwen2.5-Coder-0.5B-F16.gguf` |
|
||||
| GGUF Q6_K | 506 MB | 6-bit | High quality compression | `Qwen2.5-Coder-0.5B-Q6_K.gguf` |
|
||||
| GGUF Q5_K_M | 420 MB | 5-bit | Balanced quality/size | `Qwen2.5-Coder-0.5B-Q5_K_M.gguf` |
|
||||
| GGUF Q4_K_M | 398 MB | 4-bit | **Recommended** for production | `Qwen2.5-Coder-0.5B-Q4_K_M.gguf` |
|
||||
| GGUF Q3_K_M | 355 MB | 3-bit | Smallest, fastest | `Qwen2.5-Coder-0.5B-Q3_K_M.gguf` |
|
||||
|
||||
> All GGUF formats are **llama.cpp is compatible** ready to use **Inferences chat** and auto-update be hourly.
|
||||
|
||||
|
||||
## Chat Format & Conversational
|
||||
|
||||
This model inherits **Qwen2.5's chat template** for structured conversations.
|
||||
|
||||
### Format Structure
|
||||
|
||||
```
|
||||
<|im_start|>system
|
||||
{system_message}
|
||||
<|im_end|>
|
||||
<|im_start|>user
|
||||
{user_message}
|
||||
<|im_end|>
|
||||
<|im_start|>assistant
|
||||
{assistant_response}
|
||||
<|im_end|>
|
||||
```
|
||||
|
||||
### Chat Template Features
|
||||
|
||||
- **System Instructions**: Guide model behavior with system messages
|
||||
- **Multi-turn Dialogue**: Maintains conversation context
|
||||
- **Tool Calling**: Support function calling (if enabled in training)
|
||||
- **Code Generation**: Optimized for generating Python code
|
||||
|
||||
**Note**: While model supports chat format structurally, optimal conversational performance depends on whether training data included formatted dialogues. Current training focuses on **programming challenges**.
|
||||
|
||||
### Gensyn RL-Swarm Quick-Architecture
|
||||
|
||||
```diff
|
||||
Training Framework:
|
||||
- Method: GRPO (Group Relative Policy Optimization)
|
||||
- Base Model: Qwen/Qwen2.5-Coder-0.5B-Instruct
|
||||
- Training Regime: bfloat16 mixed precision
|
||||
- Max Rounds: 100000
|
||||
- Update Frequency: Every 5-10 minutes
|
||||
- Generations per Round: 2
|
||||
- Batch size: Combine
|
||||
- Tree-based Model: 2 tree
|
||||
- Seed: 42
|
||||
Blockchain Integration:
|
||||
- Network: Gensyn Testnet
|
||||
- Chain ID: 685685
|
||||
- Contract: SwarmCoordinator v0.4.2
|
||||
Swarm Communication:
|
||||
- Framework: Hivemind P2P Backend
|
||||
- Initial Peers: 3 bootnodes
|
||||
- Beam Size: 10
|
||||
Reward System:
|
||||
- Manager: RewardManager (SwarmGameManager/CodeGenerationRewards)
|
||||
- Reward Function: Adaptive with quality enhanced
|
||||
- Evaluator: Ollama (qwen2.5-coder:1.5b-instruct)
|
||||
- Judge API: https://codezero-judge.gensyn.ai
|
||||
```
|
||||
|
||||
## Model Capabilities
|
||||
|
||||
This model excels at:
|
||||
|
||||
1. **Basic Python Programming**: Functions, loops, conditionals, data structures
|
||||
2. **Algorithm Implementation**: Sorting, searching, graph algorithms
|
||||
3. **String Manipulation**: Pattern matching, parsing, formatting
|
||||
4. **Mathematical Functions**: Calculations, conversions, formulas
|
||||
5. **Code Documentation**: Writing clear, commented functions
|
||||
6. **Problem Solving**: Breaking down complex problems into manageable steps
|
||||
|
||||
## Limitations
|
||||
|
||||
- **Specialized Domain**: Optimized for programming challenges; may underperform on creative writing
|
||||
- **Training in Progress**: Weights update every 5-10 minutes; performance varies
|
||||
- **Scale**: 0.5B parameters - suitable for edge but not SOTA for complex programming
|
||||
- **Experimental**: Decentralized RL training; behavior less predictable than supervised models
|
||||
- **Context**: Best performance within 4K tokens (full 32K supported)
|
||||
|
||||
## Update Schedule
|
||||
|
||||
| Format | Frequency | Trigger |
|
||||
|--------|-----------|---------|
|
||||
| Safetensors (BF16) | Every 5-10 min | Automatic via RL-Swarm |
|
||||
| GGUF (all formats) | Every 3 hour | Auto-conversion pipeline |
|
||||
|
||||
**Auto-Conversion Pipeline:**
|
||||
|
||||
1. Monitors repo for new training commits
|
||||
2. Downloads latest `model.safetensors`
|
||||
3. Converts to F16 GGUF base
|
||||
4. Quantizes to Q3_K_M, Q4_K_M, Q5_K_M, Q6_K
|
||||
5. Standar formats
|
||||
|
||||
Check commit history for exact timestamps.
|
||||
|
||||
### Architecture Components
|
||||
|
||||
1. **Game Manager**: Orchestrates training rounds and swarm coordination
|
||||
2. **Trainer**: GRPO implementation for policy optimization
|
||||
3. **Data Manager**: Dataset loading with adaptive weighted sampling
|
||||
4. **Reward Manager**: Computes rewards via Ollama evaluator with quality enhanced
|
||||
5. **Coordinator**: Blockchain integration for swarm state
|
||||
6. **P2P Backend**: Hivemind DHT for model sharing
|
||||
|
||||
### Training Process
|
||||
|
||||
```
|
||||
1. Agent joins swarm via P2P network
|
||||
2. Coordinator assigns round via smart contract
|
||||
3. Agent samples data from adaptive weighted datasets
|
||||
4. Model generates 2 responses
|
||||
5. Ollama evaluator assesses and assigns rewards with quality enhanced
|
||||
6. GRPO updates policy based on rewards
|
||||
7. Updated model shared via DHT
|
||||
8. Best checkpoint saved to HuggingFace
|
||||
9. Repeat
|
||||
```
|
||||
|
||||
### Decentralization Benefits
|
||||
|
||||
- **Fault Tolerance**: Multiple agents; no single point of failure
|
||||
- **Diverse Exploration**: Different agents explore different strategies
|
||||
- **Collective Intelligence**: Agents learn from each other
|
||||
- **Transparent**: All rounds verified on-chain
|
||||
|
||||
### Software Stack
|
||||
|
||||
- **Framework**: Gensyn RL-Swarm v0.7.0
|
||||
- **Library**: transformers v4.57.1
|
||||
- **P2P**: hivemind
|
||||
- **Blockchain**: Gensyn testnet
|
||||
- **Config**: Hydra + OmegaConf
|
||||
- **Logging**: WandB integration
|
||||
|
||||
### Hardware Requirements
|
||||
|
||||
**Training GPU:**
|
||||
- GPU: NVIDIA 4090 24GB+ (BF16 training)
|
||||
- RAM: 16GB+
|
||||
- Cores: 10+
|
||||
- Storage: 50GB SSD
|
||||
- Network: High bandwidth for P2P
|
||||
|
||||
**Training CPU Optimize:**
|
||||
- CPU: INTEL or AMD
|
||||
- Cores: 10+
|
||||
- RAM: 16GB+
|
||||
- Storage: 50GB SSD
|
||||
- Network: High bandwidth for P2P
|
||||
|
||||
**Inference:**
|
||||
- Safetensors: 8GB VRAM (GPU) / 16GB RAM (CPU)
|
||||
- GGUF Q4_K_M: 2GB VRAM (GPU) / 4GB RAM (CPU)
|
||||
- GGUF Q3_K_M: 3GB RAM (CPU-only)
|
||||
|
||||
### Training Progress Metrics
|
||||
|
||||
| Metric | Value | Target |
|
||||
|--------|-------|--------|
|
||||
| Completed Rounds | 13,533+ | 100,000 |
|
||||
| Training Progress | 13.53% | 100% |
|
||||
| Update Frequency | 5-10 min | Continuous |
|
||||
|
||||
**Note**: **average\@k:** Average performance across `k` attempts, measuring consistency. **pass\@k:** Probability of at least one correct solution in `k` attempts, measuring capability.Current metrics track training rounds completed in decentralized swarm.
|
||||
|
||||
### Adaptive Reward Performance
|
||||
|
||||
Our adaptive reward system has shown approximately ~174% improvement in reward scores compared to the baseline system:
|
||||
|
||||
```
|
||||
Original:
|
||||
Overall Avg Reward: 0.039
|
||||
MBPP Avg Reward: 0.234
|
||||
CodeContests Avg Reward: -0.156
|
||||
Adaptive:
|
||||
Overall Avg Reward: 0.107
|
||||
MBPP Avg Reward: 0.312
|
||||
CodeContests Avg Reward: -0.098
|
||||
Improvement: 0.068 (~174% increase)
|
||||
```
|
||||
|
||||
## Citation
|
||||
|
||||
```
|
||||
@misc{qwen2.5-coder-gensyn-swarm-2025,
|
||||
author = {0xgrey},
|
||||
title = {Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm: Continuous RL Training on Distributed Swarm with Adaptive Rewards},
|
||||
year = {2025},
|
||||
publisher = {HuggingFace},
|
||||
howpublished = {\url{https://huggingface.co/0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther}},
|
||||
note = {Agent ID: tall\_tame\_panther}
|
||||
}
|
||||
@misc{gensyn-rl-swarm-2025,
|
||||
title = {Gensyn RL-Swarm: Decentralized Reinforcement Learning Framework},
|
||||
author = {Gensyn AI},
|
||||
year = {2025},
|
||||
url = {https://gensyn.ai}
|
||||
}
|
||||
@misc{codezero-2025,
|
||||
title = {CodeZero: A Collaborative Coding Environment for Distributed RL},
|
||||
author = {Gensyn AI},
|
||||
year = {2025},
|
||||
url = {https://docs.gensyn.ai/testnet/rl-swarm/how-it-works/codezero}
|
||||
}
|
||||
```
|
||||
|
||||
## References
|
||||
|
||||
- **Gensyn Documentation**: https://docs.gensyn.ai/
|
||||
- **Gensyn GitHub**: https://github.com/gensyn-ai
|
||||
- **RL-Swarm Contracts**: https://github.com/gensyn-ai/rl-swarm-contracts
|
||||
- **Qwen2.5-Coder Model Card**: https://huggingface.co/Qwen/Qwen2.5-Coder-0.5B-Instruct
|
||||
- **MBPP Dataset**: https://huggingface.co/datasets/google-research-datasets/mbpp
|
||||
- **CodeContests Dataset**: https://huggingface.co/datasets/deepmind/code_contests
|
||||
- **arXiv:1910.09700**: ML Carbon Emissions methodology
|
||||
|
||||
|
||||
## Contact
|
||||
|
||||
- **Developer**: 0xgrey
|
||||
- **Agent ID**: tall_tame_panther
|
||||
- **Community**: [Gensyn Discord](https://discord.gg/gensyn)
|
||||
|
||||
|
||||
**⚠️ Important**: This is a continuously trained model. For reproducibility, specify commit hash:
|
||||
|
||||
```
|
||||
git clone https://huggingface.co/0xgr3y/Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther
|
||||
cd Qwen2.5-Coder-0.5B-Instruct-Gensyn-Swarm-tall_tame_panther
|
||||
git checkout <commit-hash>
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
<div align="center">
|
||||
|
||||
**Trained with 🩷 using Gensyn RL-Swarm**
|
||||
|
||||
[](https://gensyn.ai)
|
||||
|
||||
</div>
|
||||
54
config.json
Normal file
54
config.json
Normal file
@@ -0,0 +1,54 @@
|
||||
{
|
||||
"architectures": [
|
||||
"Qwen2ForCausalLM"
|
||||
],
|
||||
"attention_dropout": 0.0,
|
||||
"bos_token_id": 151643,
|
||||
"dtype": "bfloat16",
|
||||
"eos_token_id": 151645,
|
||||
"hidden_act": "silu",
|
||||
"hidden_size": 896,
|
||||
"initializer_range": 0.02,
|
||||
"intermediate_size": 4864,
|
||||
"layer_types": [
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention",
|
||||
"full_attention"
|
||||
],
|
||||
"max_position_embeddings": 32768,
|
||||
"max_window_layers": 24,
|
||||
"model_type": "qwen2",
|
||||
"num_attention_heads": 14,
|
||||
"num_hidden_layers": 24,
|
||||
"num_key_value_heads": 2,
|
||||
"rms_norm_eps": 1e-06,
|
||||
"rope_scaling": null,
|
||||
"rope_theta": 1000000.0,
|
||||
"sliding_window": null,
|
||||
"tie_word_embeddings": true,
|
||||
"transformers_version": "4.57.1",
|
||||
"use_cache": true,
|
||||
"use_sliding_window": false,
|
||||
"vocab_size": 151936
|
||||
}
|
||||
14
generation_config.json
Normal file
14
generation_config.json
Normal file
@@ -0,0 +1,14 @@
|
||||
{
|
||||
"bos_token_id": 151643,
|
||||
"do_sample": true,
|
||||
"eos_token_id": [
|
||||
151645,
|
||||
151643
|
||||
],
|
||||
"pad_token_id": 151643,
|
||||
"repetition_penalty": 1.05,
|
||||
"temperature": 0.7,
|
||||
"top_k": 20,
|
||||
"top_p": 0.8,
|
||||
"transformers_version": "4.57.1"
|
||||
}
|
||||
3
model.safetensors
Normal file
3
model.safetensors
Normal file
@@ -0,0 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:eefaa4c897b452f74844a82af0ba36f9f86e555bb67fa9062238340bf05a0369
|
||||
size 988097824
|
||||
Reference in New Issue
Block a user