commit bec7176c7a08c9f707d11c75c401c894bed5a2e1 Author: ModelHub XC Date: Fri Apr 10 11:05:00 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: 0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..fb4e2d8 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,43 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +Qwen3-0.6B-Gensyn-Swarm-F16.gguf filter=lfs diff=lfs merge=lfs -text +Qwen3-0.6B-Gensyn-Swarm-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Qwen3-0.6B-Gensyn-Swarm-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Qwen3-0.6B-Gensyn-Swarm-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Qwen3-0.6B-F16.gguf filter=lfs diff=lfs merge=lfs -text +Qwen3-0.6B-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Qwen3-0.6B-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text +Qwen3-0.6B-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text diff --git a/Qwen3-0.6B-F16.gguf b/Qwen3-0.6B-F16.gguf new file mode 100644 index 0000000..18fae26 --- /dev/null +++ b/Qwen3-0.6B-F16.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:781945b18abb88574af9274fe71ad35c034868eee5bcfd7ec87874b7ae0074f0 +size 1198182016 diff --git a/Qwen3-0.6B-Q3_K_M.gguf b/Qwen3-0.6B-Q3_K_M.gguf new file mode 100644 index 0000000..f8a6b13 --- /dev/null +++ b/Qwen3-0.6B-Q3_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f065c6970cefdefc61da4579e04607262bed1f4161017a018a09330b5b8b36a1 +size 347126400 diff --git a/Qwen3-0.6B-Q4_K_M.gguf b/Qwen3-0.6B-Q4_K_M.gguf new file mode 100644 index 0000000..f911283 --- /dev/null +++ b/Qwen3-0.6B-Q4_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80208a4a1629d2f64870c156281b50948cbd30ae690f5f1026b103728872ce41 +size 396704384 diff --git a/Qwen3-0.6B-Q5_K_M.gguf b/Qwen3-0.6B-Q5_K_M.gguf new file mode 100644 index 0000000..c22773f --- /dev/null +++ b/Qwen3-0.6B-Q5_K_M.gguf @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:078da2b850fc45f77a9dba7bfa117bc19e0b41bd9a7d6c4df157be0d29fbff3f +size 444414592 diff --git a/README.md b/README.md new file mode 100644 index 0000000..01f9ee4 --- /dev/null +++ b/README.md @@ -0,0 +1,480 @@ +--- +library_name: transformers +tags: +- text-generation +- qwen3 +- rl-swarm +- genrl-swarm +- grpo +- gensyn +- trl +- reasoning +- math +- logic +- continuous-training +- reinforcement-learning +- safetensors +- gguf +- conversational +- text-generation-inference +- I am tall_tame_panther +pipeline_tag: text-generation +license: apache-2.0 +language: +- en +base_model: Qwen/Qwen3-0.6B +datasets: +- propositional_logic +- calendar_arithmetic +- decimal_arithmetic +- base_conversion +- fraction_simplification +- basic_arithmetic +inference: true +widget: +- text: What is 15 * 23? + example_title: Basic Arithmetic +- text: Convert decimal 255 to hexadecimal. + example_title: Base Conversion +- text: Simplify the fraction 24/36. + example_title: Fraction Simplification +model-index: +- name: Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther + results: + - task: + type: text-generation + name: Mathematical Reasoning + dataset: + name: Composite Reasoning Dataset + type: custom + metrics: + - type: training_rounds + value: 43610 + name: Completed Training Rounds + - type: total_rounds + value: 100000 + name: Target Rounds + - type: progress + value: 43.61 + name: Training Progress (%) +--- + +# Qwen3-0.6B-Gensyn-Swarm the Agent-ID (tall_tame_panther) + +[![Model](https://img.shields.io/badge/🤗%20Hugging%20Face-Model-blue)](https://huggingface.co/0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther) +[![GGUF](https://img.shields.io/badge/GGUF-Available-green)](https://huggingface.co/0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther/tree/main) +[![Gensyn](https://img.shields.io/badge/Trained%20with-Gensyn%20RL--Swarm-orange)](https://gensyn.ai) +[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) + +## Model Overview + +This model is a continuously trained Qwen3-0.6B fine-tuned using **Gensyn RL-Swarm** framework with **GRPO (Generalized Reward Policy Optimization)** and support **GGUF (llama.cpp)** for enhanced reasoning and mathematical capabilities. **Note: Current training focuses on math & reasoning tasks**. + +- **Agent ID:** `tall_tame_panther` +- **Training Status:** 🟢 LIVE - Model updates automatically every 5-10 minutes +- **Auto-Sync GGUF Pipeline Status:** 🟢 LIVE - Commits update automatically every 1h-hourly +- **Current Progress:** Round 43,610+ / 100,000 (43,61%) +- **Framework Version:** Gensyn RL-Swarm v0.6.4 +- **Contract:** SwarmCoordinator v0.4.2 + +## Key Features + +- **Real-time Training**: Continuous learning with distributed RL across Gensyn swarm network +- **Multi-domain Reasoning**: Trained on logic, mathematical problem-solving & reasoning tasks +- **GGUF Support**: Multiple quantized formats available (F16, Q3_K_M, Q4_K_M, Q5_K_M) +- **llama.cpp Compatible**: Ready for edge deployment and local inference +- **BF16 Precision**: Trained with bfloat16 for optimal performance +- **TGI Compatible**: Supports Text Generation Inference for production deployment +- **Chat Format Support**: Inherits Qwen3 chat template for conversational use + +## Training Data + +The model is trained on a composite dataset (1,000 samples) with weighted sampling strategy: + +| Dataset | Weight | Focus Area | +|---------|--------|------------| +| Propositional Logic | 7 | Logical reasoning, truth tables, Boolean operations | +| Calendar Arithmetic | 6 | Date calculations, leap years, recurring events | +| Decimal Arithmetic | 5 | Multi-term decimal operations with precision | +| Base Conversion | 4 | Number system conversions (base 2-16) | +| Fraction Simplification | 4 | GCD/LCM, fraction reduction | +| Basic Arithmetic | 2 | Foundation operations with parentheses | + +**Total Dataset Size:** 1,000 composite samples +**Training Samples per Round:** 2 +**Evaluation:** Real-time via swarm coordination + +## Quick Start + +### Standard Transformers + +``` +from transformers import AutoModelForCausalLM, AutoTokenizer + +model = AutoModelForCausalLM.from_pretrained( + "0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther", + torch_dtype="auto", + device_map="auto" +) +tokenizer = AutoTokenizer.from_pretrained("0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther") + +prompt = "What is 3/4 simplified to lowest terms?" +inputs = tokenizer(prompt, return_tensors="pt").to(model.device) +outputs = model.generate(**inputs, max_length=256, temperature=0.6, top_p=0.95) +print(tokenizer.decode(outputs, skip_special_tokens=True)) +``` + +### Chat Format (Conversational) + +``` +from transformers import AutoModelForCausalLM, AutoTokenizer + +model = AutoModelForCausalLM.from_pretrained("0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther") +tokenizer = AutoTokenizer.from_pretrained("0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther") + +messages = [ + {"role": "system", "content": "You are a helpful math tutor."}, + {"role": "user", "content": "Explain how to simplify 24/36 step by step."} +] + +text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) +inputs = tokenizer(text, return_tensors="pt") +outputs = model.generate(**inputs, max_length=512) +print(tokenizer.decode(outputs)) +``` + +### Text Generation Inference (TGI) + +``` +docker run -d --gpus all \ + -p 8080:80 \ + -v $PWD/data:/data \ + ghcr.io/huggingface/text-generation-inference:latest \ + --model-id 0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther \ + --max-input-length 4096 \ + --max-total-tokens 8192 +``` + +### GGUF with llama.cpp + +``` +# Download quantized model (recommended: Q4_K_M) +wget https://huggingface.co/0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther/resolve/main/Qwen3-0.6B-Gensyn-Swarm-Q4_K_M.gguf + +# Run inference +./llama-cli -m Qwen3-0.6B-Gensyn-Swarm-Q4_K_M.gguf \ + -p "Solve: (5 + 3) * 2 = ?" \ + --temp 0.6 --top-p 0.95 +``` + +### Ollama + +``` +# Create Modelfile +cat > Modelfile << 'EOF' +FROM ./Qwen3-0.6B-Gensyn-Swarm-Q4_K_M.gguf +PARAMETER temperature 0.6 +PARAMETER top_p 0.95 +PARAMETER top_k 20 +SYSTEM "You are a helpful assistant specialized in mathematical reasoning and logic." +EOF + +# Create and run +ollama create qwen3-swarm -f Modelfile +ollama run qwen3-swarm "What is 15 multiplied by 23?" +``` + +## Available Formats + +| Format | Size | Precision | Use Case | Download | +|--------|------|-----------|----------|----------| +| Safetensors (BF16) | 1.19 GB | BF16 | Full precision training/fine-tuning | `model.safetensors` | +| GGUF F16 | 1.14 GB | FP16 | High quality inference | `Qwen3-0.6B-Gensyn-Swarm-F16.gguf` | +| GGUF Q5_K_M | 444 MB | 5-bit | Balanced quality/size | `Qwen3-0.6B-Gensyn-Swarm-Q5_K_M.gguf` | +| GGUF Q4_K_M | 397 MB | 4-bit | **Recommended** for production | `Qwen3-0.6B-Gensyn-Swarm-Q4_K_M.gguf` | +| GGUF Q3_K_M | 347 MB | 3-bit | Smallest, fastest | `Qwen3-0.6B-Gensyn-Swarm-Q3_K_M.gguf` | + +All GGUF formats are **llama.cpp compatible** and auto-updated hourly. + +### GGUF Quantization Strategy + +The Q5_K_M format uses mixed precision for optimal quality: + +- **Token Embeddings**: Q6_K (high quality vocab representation) +- **Attention Weights**: Q5_K (balanced quality/size) +- **Feed-Forward**: Q5_K/Q6_K (mixed for optimal performance) +- **Layer Norms**: F32 (full precision for stability) + +This strategy ensures minimal quality loss while maintaining small file size. + +## Chat Format & Conversational Use + +This model inherits **Qwen3's chat template** for structured conversations. + +### Format Structure + +``` +<|im_start|>system +{system_message} +<|im_end|> +<|im_start|>user +{user_message} +<|im_end|> +<|im_start|>assistant +{assistant_response} +<|im_end|> +``` + +### Chat Template Features + +- **System Instructions**: Guide model behavior with system messages +- **Multi-turn Dialogue**: Maintains conversation context +- **Tool Calling**: Support function calling (if enabled in training) +- **Reasoning Mode**: `` tags for chain-of-thought (experimental) + +**Note**: While the model supports chat format structurally, optimal conversational performance depends on whether training data included formatted dialogues. Current training focuses on **math/reasoning tasks**. + +## Training Configuration + +### Gensyn RL-Swarm Architecture + +``` +Training Framework: + Method: GRPO (Generalized Reward Policy Optimization) + Base Model: Qwen/Qwen3-0.6B + Training Regime: bfloat16 mixed precision + Max Rounds: 100,000 + Update Frequency: Every 5-10 minutes + Generations per Round: 2 + Seed: 42 + +Blockchain Integration: + Network: Gensyn Testnet + Chain ID: 685685 + Contract: SwarmCoordinator v0.4.2 + +Swarm Communication: + Framework: Hivemind P2P Backend + Initial Peers: 3 bootnodes + Beam Size: 30 + +Reward System: + Manager: DefaultRewardManager + Reward Function: RGRewards (Reasoning Gym) + Judge API: https://swarm-judge.internal-apps-central1.clusters.gensyn.ai +``` + +### Model Hyperparameters + +``` +Architecture: + Hidden Size: 1024 + Intermediate Size: 3072 + Layers: 28 + Attention Heads: 16 + KV Heads: 8 + Head Dimension: 128 + Context Length: 40,960 tokens + Vocabulary: 151,936 tokens + +GRPO Config: + Epsilon: 0.2 + Epsilon High: 0.28 + Gradient Checkpointing: Enabled + +Generation: + Temperature: 0.6 + Top-K: 20 + Top-P: 0.95 +``` + +## Model Capabilities + +This model excels at: + +1. **Logical Reasoning**: Propositional logic, truth evaluation, Boolean algebra +2. **Mathematical Operations**: Multi-precision arithmetic, decimal calculations, fractions +3. **Number Systems**: Base conversion (binary, octal, decimal, hexadecimal) +4. **Date/Time Calculations**: Calendar arithmetic, leap years, day-of-week +5. **Step-by-step Problem Solving**: Chain-of-thought reasoning +6. **Conversational Tutoring**: Interactive problem-solving (via chat format) + +## Limitations + +- **Specialized Domain**: Optimized for reasoning/math; may underperform on creative writing +- **Training in Progress**: Weights update every 5-10 minutes; performance varies +- **Scale**: 0.6B parameters - suitable for edge but not SOTA for complex reasoning +- **Experimental**: Decentralized RL training; behavior less predictable than supervised models +- **Context**: Best performance within 4K tokens (full 40K supported) + +## Update Schedule + +| Format | Frequency | Trigger | +|--------|-----------|---------| +| Safetensors (BF16) | Every 5-10 min | Automatic via RL-Swarm | +| GGUF (all formats) | Every 1 hour | Auto-conversion pipeline | + +**Auto-Conversion Pipeline:** +1. Monitors repo for new training commits +2. Downloads latest `model.safetensors` +3. Converts to F16 GGUF base +4. Quantizes to Q3_K_M, Q4_K_M, Q5_K_M +5. Uploads all formats + +Check commit history for exact timestamps. + +## Gensyn RL-Swarm Technical Details + +### Architecture Components + +1. **Game Manager**: Orchestrates training rounds and swarm coordination +2. **Trainer**: GRPO implementation for policy optimization +3. **Data Manager**: Dataset loading and weighted sampling +4. **Reward Manager**: Computes rewards via judge API +5. **Coordinator**: Blockchain integration for swarm state +6. **P2P Backend**: Hivemind DHT for model sharing + +### Training Process + +``` +1. Agent joins swarm via P2P network +2. Coordinator assigns round via smart contract +3. Agent samples data from weighted datasets +4. Model generates 2 responses +5. Judge API evaluates and assigns rewards +6. GRPO updates policy based on rewards +7. Updated model shared via DHT +8. Best checkpoint saved to HuggingFace +9. Repeat +``` + +### Decentralization Benefits + +- **Fault Tolerance**: Multiple agents; no single point of failure +- **Diverse Exploration**: Different agents explore different strategies +- **Collective Intelligence**: Agents learn from each other +- **Transparent**: All rounds verified on-chain + +**Swarm Agent:** `tall_tame_panther` +**Contract:** SwarmCoordinator v0.4.2 + +## Technical Specifications + +### Software Stack + +- **Framework**: Gensyn RL-Swarm v0.6.4 +- **Library**: transformers v4.51+ +- **P2P**: hivemind +- **Blockchain**: Gensyn testnet +- **Config**: Hydra + OmegaConf +- **Logging**: WandB integration + +### Hardware Requirements + +**Training GPU:** +- GPU: NVIDIA 4090 24GB+ (BF16 training) +- RAM: 16GB+ +- Cores: 10+ +- Storage: 50GB SSD +- Network: High bandwidth for P2P + +**Training CPU Optimize:** +- CPU: INTEL or AMD +- Cores: 10+ +- RAM: 16GB+ +- Storage: 50GB SSD +- Network: High bandwidth for P2P + +**Inference:** +- Safetensors: 8GB VRAM (GPU) / 16GB RAM (CPU) +- GGUF Q4_K_M: 2GB VRAM (GPU) / 4GB RAM (CPU) +- GGUF Q3_K_M: 3GB RAM (CPU-only) + +## Evaluation + +### Training Progress Metrics + +| Metric | Value | Target | +|--------|-------|--------| +| Completed Rounds | 43,610+ | 100,000 | +| Training Progress | 43.61% | 100% | +| Update Frequency | 5-10 min | Continuous | + +**Note**: Formal evaluation benchmarks (GSM8K, MATH, etc.) will be added as training progresses. Current metrics track training rounds completed in the decentralized swarm. + +## Reproducibility + +To reproduce training: + +1. Clone Gensyn RL-Swarm repository +2. Install: `pip install -r requirements.txt` +3. Configure `rgym_exp/config/rg-swarm.yaml` +4. Configure `rgym_exp/src/datasets.yaml` +5. Set environment variables: +``` +export HUGGINGFACE_ACCESS_TOKEN= +export MODEL_NAME=Qwen/Qwen3-0.6B +export ORG_ID= +export SWARM_CONTRACT= +``` +6. Run: `bash run_rl_swarm.sh` + +**Note**: Exact reproduction requires same seed (42), dataset config, and swarm state. + +## Citation + +``` +@misc{qwen3-gensyn-swarm-2025, + author = {0xgrey}, + title = {Qwen3-0.6B-Gensyn-Swarm: Continuous RL Training on Distributed Swarm}, + year = {2025}, + publisher = {HuggingFace}, + howpublished = {\url{https://huggingface.co/0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther}}, + note = {Agent ID: tall\_tame\_panther} +} + +@misc{gensyn-rl-swarm-2025, + title = {Gensyn RL-Swarm: Decentralized Reinforcement Learning Framework}, + author = {Gensyn AI}, + year = {2025}, + url = {https://gensyn.ai} +} +``` + +## References + +- **Gensyn Documentation**: https://docs.gensyn.ai/ +- **Gensyn GitHub**: https://github.com/gensyn-ai +- **RL-Swarm Contracts**: https://github.com/gensyn-ai/rl-swarm-contracts +- **Qwen3 Model Card**: https://huggingface.co/Qwen/Qwen3-0.6B +- **arXiv:1910.09700**: ML Carbon Emissions methodology + +## License + +Apache 2.0 - See [LICENSE](LICENSE) + +## Contact + +- **Developer**: 0xgrey +- **Agent ID**: tall_tame_panther +- **Community**: [Gensyn Discord](https://discord.gg/gensyn) + +--- + +**⚠️ Important**: This is a continuously trained model. For reproducibility, specify commit hash: + +``` +git clone https://huggingface.co/0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther +cd Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther +git checkout +``` + +--- + +
+ +**🤖 Trained with ❤️ using Gensyn RL-Swarm** + +[![Gensyn](https://img.shields.io/badge/Powered%20by-Gensyn%20AI-orange?style=for-the-badge)](https://gensyn.ai) + +
\ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..2cff534 --- /dev/null +++ b/config.json @@ -0,0 +1,30 @@ +{ + "architectures": [ + "Qwen3ForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151645, + "head_dim": 128, + "hidden_act": "silu", + "hidden_size": 1024, + "initializer_range": 0.02, + "intermediate_size": 3072, + "max_position_embeddings": 40960, + "max_window_layers": 28, + "model_type": "qwen3", + "num_attention_heads": 16, + "num_hidden_layers": 28, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-06, + "rope_scaling": null, + "rope_theta": 1000000, + "sliding_window": null, + "tie_word_embeddings": true, + "torch_dtype": "bfloat16", + "transformers_version": "4.51.3", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..e4f1d31 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,13 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "temperature": 0.6, + "top_k": 20, + "top_p": 0.95, + "transformers_version": "4.51.3" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..80ba974 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5dab513aba5eab584b0c2e029bd3df4a2b2ce34109af35babe9e178dd193602c +size 1192135096