commit bec7176c7a08c9f707d11c75c401c894bed5a2e1
Author: ModelHub XC <noreply@modelhub.org.cn>
Date:   Fri Apr 10 11:05:00 2026 +0800

    初始化项目，由ModelHub XC社区提供模型
    
    Model: 0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther
    Source: Original Platform

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..fb4e2d8
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,43 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+Qwen3-0.6B-Gensyn-Swarm-F16.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen3-0.6B-Gensyn-Swarm-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen3-0.6B-Gensyn-Swarm-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen3-0.6B-Gensyn-Swarm-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen3-0.6B-F16.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen3-0.6B-Q3_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen3-0.6B-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+Qwen3-0.6B-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
diff --git a/Qwen3-0.6B-F16.gguf b/Qwen3-0.6B-F16.gguf
new file mode 100644
index 0000000..18fae26
--- /dev/null
+++ b/Qwen3-0.6B-F16.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:781945b18abb88574af9274fe71ad35c034868eee5bcfd7ec87874b7ae0074f0
+size 1198182016
diff --git a/Qwen3-0.6B-Q3_K_M.gguf b/Qwen3-0.6B-Q3_K_M.gguf
new file mode 100644
index 0000000..f8a6b13
--- /dev/null
+++ b/Qwen3-0.6B-Q3_K_M.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f065c6970cefdefc61da4579e04607262bed1f4161017a018a09330b5b8b36a1
+size 347126400
diff --git a/Qwen3-0.6B-Q4_K_M.gguf b/Qwen3-0.6B-Q4_K_M.gguf
new file mode 100644
index 0000000..f911283
--- /dev/null
+++ b/Qwen3-0.6B-Q4_K_M.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:80208a4a1629d2f64870c156281b50948cbd30ae690f5f1026b103728872ce41
+size 396704384
diff --git a/Qwen3-0.6B-Q5_K_M.gguf b/Qwen3-0.6B-Q5_K_M.gguf
new file mode 100644
index 0000000..c22773f
--- /dev/null
+++ b/Qwen3-0.6B-Q5_K_M.gguf
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:078da2b850fc45f77a9dba7bfa117bc19e0b41bd9a7d6c4df157be0d29fbff3f
+size 444414592
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..01f9ee4
--- /dev/null
+++ b/README.md
@@ -0,0 +1,480 @@
+---
+library_name: transformers
+tags:
+- text-generation
+- qwen3
+- rl-swarm
+- genrl-swarm
+- grpo
+- gensyn
+- trl
+- reasoning
+- math
+- logic
+- continuous-training
+- reinforcement-learning
+- safetensors
+- gguf
+- conversational
+- text-generation-inference
+- I am tall_tame_panther
+pipeline_tag: text-generation
+license: apache-2.0
+language:
+- en
+base_model: Qwen/Qwen3-0.6B
+datasets:
+- propositional_logic
+- calendar_arithmetic
+- decimal_arithmetic
+- base_conversion
+- fraction_simplification
+- basic_arithmetic
+inference: true
+widget:
+- text: What is 15 * 23?
+  example_title: Basic Arithmetic
+- text: Convert decimal 255 to hexadecimal.
+  example_title: Base Conversion
+- text: Simplify the fraction 24/36.
+  example_title: Fraction Simplification
+model-index:
+- name: Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther
+  results:
+  - task:
+      type: text-generation
+      name: Mathematical Reasoning
+    dataset:
+      name: Composite Reasoning Dataset
+      type: custom
+    metrics:
+    - type: training_rounds
+      value: 43610
+      name: Completed Training Rounds
+    - type: total_rounds
+      value: 100000
+      name: Target Rounds
+    - type: progress
+      value: 43.61
+      name: Training Progress (%)
+---
+
+# Qwen3-0.6B-Gensyn-Swarm the Agent-ID (tall_tame_panther)
+
+[![Model](https://img.shields.io/badge/🤗%20Hugging%20Face-Model-blue)](https://huggingface.co/0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther)
+[![GGUF](https://img.shields.io/badge/GGUF-Available-green)](https://huggingface.co/0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther/tree/main)
+[![Gensyn](https://img.shields.io/badge/Trained%20with-Gensyn%20RL--Swarm-orange)](https://gensyn.ai)
+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
+
+## Model Overview
+
+This model is a continuously trained Qwen3-0.6B fine-tuned using **Gensyn RL-Swarm** framework with **GRPO (Generalized Reward Policy Optimization)** and support **GGUF (llama.cpp)** for enhanced reasoning and mathematical capabilities. **Note: Current training focuses on math & reasoning tasks**.
+
+- **Agent ID:** `tall_tame_panther`
+- **Training Status:** 🟢 LIVE - Model updates automatically every 5-10 minutes
+- **Auto-Sync GGUF Pipeline Status:** 🟢 LIVE - Commits update automatically every 1h-hourly
+- **Current Progress:** Round 43,610+ / 100,000 (43,61%)
+- **Framework Version:** Gensyn RL-Swarm v0.6.4
+- **Contract:** SwarmCoordinator v0.4.2
+
+## Key Features
+
+- **Real-time Training**: Continuous learning with distributed RL across Gensyn swarm network
+- **Multi-domain Reasoning**: Trained on logic, mathematical problem-solving & reasoning tasks
+- **GGUF Support**: Multiple quantized formats available (F16, Q3_K_M, Q4_K_M, Q5_K_M)
+- **llama.cpp Compatible**: Ready for edge deployment and local inference
+- **BF16 Precision**: Trained with bfloat16 for optimal performance
+- **TGI Compatible**: Supports Text Generation Inference for production deployment
+- **Chat Format Support**: Inherits Qwen3 chat template for conversational use
+
+## Training Data
+
+The model is trained on a composite dataset (1,000 samples) with weighted sampling strategy:
+
+| Dataset | Weight | Focus Area |
+|---------|--------|------------|
+| Propositional Logic | 7 | Logical reasoning, truth tables, Boolean operations |
+| Calendar Arithmetic | 6 | Date calculations, leap years, recurring events |
+| Decimal Arithmetic | 5 | Multi-term decimal operations with precision |
+| Base Conversion | 4 | Number system conversions (base 2-16) |
+| Fraction Simplification | 4 | GCD/LCM, fraction reduction |
+| Basic Arithmetic | 2 | Foundation operations with parentheses |
+
+**Total Dataset Size:** 1,000 composite samples  
+**Training Samples per Round:** 2  
+**Evaluation:** Real-time via swarm coordination
+
+## Quick Start
+
+### Standard Transformers
+
+```
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+model = AutoModelForCausalLM.from_pretrained(
+    "0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther",
+    torch_dtype="auto",
+    device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained("0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther")
+
+prompt = "What is 3/4 simplified to lowest terms?"
+inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+outputs = model.generate(**inputs, max_length=256, temperature=0.6, top_p=0.95)
+print(tokenizer.decode(outputs, skip_special_tokens=True))
+```
+
+### Chat Format (Conversational)
+
+```
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+model = AutoModelForCausalLM.from_pretrained("0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther")
+tokenizer = AutoTokenizer.from_pretrained("0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther")
+
+messages = [
+    {"role": "system", "content": "You are a helpful math tutor."},
+    {"role": "user", "content": "Explain how to simplify 24/36 step by step."}
+]
+
+text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+inputs = tokenizer(text, return_tensors="pt")
+outputs = model.generate(**inputs, max_length=512)
+print(tokenizer.decode(outputs))
+```
+
+### Text Generation Inference (TGI)
+
+```
+docker run -d --gpus all \
+  -p 8080:80 \
+  -v $PWD/data:/data \
+  ghcr.io/huggingface/text-generation-inference:latest \
+  --model-id 0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther \
+  --max-input-length 4096 \
+  --max-total-tokens 8192
+```
+
+### GGUF with llama.cpp
+
+```
+# Download quantized model (recommended: Q4_K_M)
+wget https://huggingface.co/0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther/resolve/main/Qwen3-0.6B-Gensyn-Swarm-Q4_K_M.gguf
+
+# Run inference
+./llama-cli -m Qwen3-0.6B-Gensyn-Swarm-Q4_K_M.gguf \
+  -p "Solve: (5 + 3) * 2 = ?" \
+  --temp 0.6 --top-p 0.95
+```
+
+### Ollama
+
+```
+# Create Modelfile
+cat > Modelfile << 'EOF'
+FROM ./Qwen3-0.6B-Gensyn-Swarm-Q4_K_M.gguf
+PARAMETER temperature 0.6
+PARAMETER top_p 0.95
+PARAMETER top_k 20
+SYSTEM "You are a helpful assistant specialized in mathematical reasoning and logic."
+EOF
+
+# Create and run
+ollama create qwen3-swarm -f Modelfile
+ollama run qwen3-swarm "What is 15 multiplied by 23?"
+```
+
+## Available Formats
+
+| Format | Size | Precision | Use Case | Download |
+|--------|------|-----------|----------|----------|
+| Safetensors (BF16) | 1.19 GB | BF16 | Full precision training/fine-tuning | `model.safetensors` |
+| GGUF F16 | 1.14 GB | FP16 | High quality inference | `Qwen3-0.6B-Gensyn-Swarm-F16.gguf` |
+| GGUF Q5_K_M | 444 MB | 5-bit | Balanced quality/size | `Qwen3-0.6B-Gensyn-Swarm-Q5_K_M.gguf` |
+| GGUF Q4_K_M | 397 MB | 4-bit | **Recommended** for production | `Qwen3-0.6B-Gensyn-Swarm-Q4_K_M.gguf` |
+| GGUF Q3_K_M | 347 MB | 3-bit | Smallest, fastest | `Qwen3-0.6B-Gensyn-Swarm-Q3_K_M.gguf` |
+
+All GGUF formats are **llama.cpp compatible** and auto-updated hourly.
+
+### GGUF Quantization Strategy
+
+The Q5_K_M format uses mixed precision for optimal quality:
+
+- **Token Embeddings**: Q6_K (high quality vocab representation)
+- **Attention Weights**: Q5_K (balanced quality/size)
+- **Feed-Forward**: Q5_K/Q6_K (mixed for optimal performance)
+- **Layer Norms**: F32 (full precision for stability)
+
+This strategy ensures minimal quality loss while maintaining small file size.
+
+## Chat Format & Conversational Use
+
+This model inherits **Qwen3's chat template** for structured conversations.
+
+### Format Structure
+
+```
+<|im_start|>system
+{system_message}
+<|im_end|>
+<|im_start|>user
+{user_message}
+<|im_end|>
+<|im_start|>assistant
+{assistant_response}
+<|im_end|>
+```
+
+### Chat Template Features
+
+- **System Instructions**: Guide model behavior with system messages
+- **Multi-turn Dialogue**: Maintains conversation context
+- **Tool Calling**: Support function calling (if enabled in training)
+- **Reasoning Mode**: `<think>` tags for chain-of-thought (experimental)
+
+**Note**: While the model supports chat format structurally, optimal conversational performance depends on whether training data included formatted dialogues. Current training focuses on **math/reasoning tasks**.
+
+## Training Configuration
+
+### Gensyn RL-Swarm Architecture
+
+```
+Training Framework:
+  Method: GRPO (Generalized Reward Policy Optimization)
+  Base Model: Qwen/Qwen3-0.6B
+  Training Regime: bfloat16 mixed precision
+  Max Rounds: 100,000
+  Update Frequency: Every 5-10 minutes
+  Generations per Round: 2
+  Seed: 42
+
+Blockchain Integration:
+  Network: Gensyn Testnet
+  Chain ID: 685685
+  Contract: SwarmCoordinator v0.4.2
+
+Swarm Communication:
+  Framework: Hivemind P2P Backend
+  Initial Peers: 3 bootnodes
+  Beam Size: 30
+
+Reward System:
+  Manager: DefaultRewardManager
+  Reward Function: RGRewards (Reasoning Gym)
+  Judge API: https://swarm-judge.internal-apps-central1.clusters.gensyn.ai
+```
+
+### Model Hyperparameters
+
+```
+Architecture:
+  Hidden Size: 1024
+  Intermediate Size: 3072
+  Layers: 28
+  Attention Heads: 16
+  KV Heads: 8
+  Head Dimension: 128
+  Context Length: 40,960 tokens
+  Vocabulary: 151,936 tokens
+
+GRPO Config:
+  Epsilon: 0.2
+  Epsilon High: 0.28
+  Gradient Checkpointing: Enabled
+  
+Generation:
+  Temperature: 0.6
+  Top-K: 20
+  Top-P: 0.95
+```
+
+## Model Capabilities
+
+This model excels at:
+
+1. **Logical Reasoning**: Propositional logic, truth evaluation, Boolean algebra
+2. **Mathematical Operations**: Multi-precision arithmetic, decimal calculations, fractions
+3. **Number Systems**: Base conversion (binary, octal, decimal, hexadecimal)
+4. **Date/Time Calculations**: Calendar arithmetic, leap years, day-of-week
+5. **Step-by-step Problem Solving**: Chain-of-thought reasoning
+6. **Conversational Tutoring**: Interactive problem-solving (via chat format)
+
+## Limitations
+
+- **Specialized Domain**: Optimized for reasoning/math; may underperform on creative writing
+- **Training in Progress**: Weights update every 5-10 minutes; performance varies
+- **Scale**: 0.6B parameters - suitable for edge but not SOTA for complex reasoning
+- **Experimental**: Decentralized RL training; behavior less predictable than supervised models
+- **Context**: Best performance within 4K tokens (full 40K supported)
+
+## Update Schedule
+
+| Format | Frequency | Trigger |
+|--------|-----------|---------|
+| Safetensors (BF16) | Every 5-10 min | Automatic via RL-Swarm |
+| GGUF (all formats) | Every 1 hour | Auto-conversion pipeline |
+
+**Auto-Conversion Pipeline:**
+1. Monitors repo for new training commits
+2. Downloads latest `model.safetensors`
+3. Converts to F16 GGUF base
+4. Quantizes to Q3_K_M, Q4_K_M, Q5_K_M
+5. Uploads all formats
+
+Check commit history for exact timestamps.
+
+## Gensyn RL-Swarm Technical Details
+
+### Architecture Components
+
+1. **Game Manager**: Orchestrates training rounds and swarm coordination
+2. **Trainer**: GRPO implementation for policy optimization
+3. **Data Manager**: Dataset loading and weighted sampling
+4. **Reward Manager**: Computes rewards via judge API
+5. **Coordinator**: Blockchain integration for swarm state
+6. **P2P Backend**: Hivemind DHT for model sharing
+
+### Training Process
+
+```
+1. Agent joins swarm via P2P network
+2. Coordinator assigns round via smart contract
+3. Agent samples data from weighted datasets
+4. Model generates 2 responses
+5. Judge API evaluates and assigns rewards
+6. GRPO updates policy based on rewards
+7. Updated model shared via DHT
+8. Best checkpoint saved to HuggingFace
+9. Repeat
+```
+
+### Decentralization Benefits
+
+- **Fault Tolerance**: Multiple agents; no single point of failure
+- **Diverse Exploration**: Different agents explore different strategies
+- **Collective Intelligence**: Agents learn from each other
+- **Transparent**: All rounds verified on-chain
+
+**Swarm Agent:** `tall_tame_panther`  
+**Contract:** SwarmCoordinator v0.4.2
+
+## Technical Specifications
+
+### Software Stack
+
+- **Framework**: Gensyn RL-Swarm v0.6.4
+- **Library**: transformers v4.51+
+- **P2P**: hivemind
+- **Blockchain**: Gensyn testnet
+- **Config**: Hydra + OmegaConf
+- **Logging**: WandB integration
+
+### Hardware Requirements
+
+**Training GPU:**
+- GPU: NVIDIA 4090 24GB+ (BF16 training)
+- RAM: 16GB+
+- Cores: 10+
+- Storage: 50GB SSD
+- Network: High bandwidth for P2P
+
+**Training CPU Optimize:**
+- CPU: INTEL or AMD
+- Cores: 10+
+- RAM: 16GB+
+- Storage: 50GB SSD
+- Network: High bandwidth for P2P
+ 
+**Inference:**
+- Safetensors: 8GB VRAM (GPU) / 16GB RAM (CPU)
+- GGUF Q4_K_M: 2GB VRAM (GPU) / 4GB RAM (CPU)
+- GGUF Q3_K_M: 3GB RAM (CPU-only)
+
+## Evaluation
+
+### Training Progress Metrics
+
+| Metric | Value | Target |
+|--------|-------|--------|
+| Completed Rounds | 43,610+ | 100,000 |
+| Training Progress | 43.61% | 100% |
+| Update Frequency | 5-10 min | Continuous |
+
+**Note**: Formal evaluation benchmarks (GSM8K, MATH, etc.) will be added as training progresses. Current metrics track training rounds completed in the decentralized swarm.
+
+## Reproducibility
+
+To reproduce training:
+
+1. Clone Gensyn RL-Swarm repository
+2. Install: `pip install -r requirements.txt`
+3. Configure `rgym_exp/config/rg-swarm.yaml`
+4. Configure `rgym_exp/src/datasets.yaml`
+5. Set environment variables:
+```
+export HUGGINGFACE_ACCESS_TOKEN=<token>
+export MODEL_NAME=Qwen/Qwen3-0.6B
+export ORG_ID=<org-id>
+export SWARM_CONTRACT=<contract-address>
+```
+6. Run: `bash run_rl_swarm.sh`
+
+**Note**: Exact reproduction requires same seed (42), dataset config, and swarm state.
+
+## Citation
+
+```
+@misc{qwen3-gensyn-swarm-2025,
+  author = {0xgrey},
+  title = {Qwen3-0.6B-Gensyn-Swarm: Continuous RL Training on Distributed Swarm},
+  year = {2025},
+  publisher = {HuggingFace},
+  howpublished = {\url{https://huggingface.co/0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther}},
+  note = {Agent ID: tall\_tame\_panther}
+}
+
+@misc{gensyn-rl-swarm-2025,
+  title = {Gensyn RL-Swarm: Decentralized Reinforcement Learning Framework},
+  author = {Gensyn AI},
+  year = {2025},
+  url = {https://gensyn.ai}
+}
+```
+
+## References
+
+- **Gensyn Documentation**: https://docs.gensyn.ai/
+- **Gensyn GitHub**: https://github.com/gensyn-ai
+- **RL-Swarm Contracts**: https://github.com/gensyn-ai/rl-swarm-contracts
+- **Qwen3 Model Card**: https://huggingface.co/Qwen/Qwen3-0.6B
+- **arXiv:1910.09700**: ML Carbon Emissions methodology
+
+## License
+
+Apache 2.0 - See [LICENSE](LICENSE)
+
+## Contact
+
+- **Developer**: 0xgrey
+- **Agent ID**: tall_tame_panther
+- **Community**: [Gensyn Discord](https://discord.gg/gensyn)
+
+---
+
+**⚠️ Important**: This is a continuously trained model. For reproducibility, specify commit hash:
+
+```
+git clone https://huggingface.co/0xgr3y/Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther
+cd Qwen3-0.6B-Gensyn-Swarm-tall_tame_panther
+git checkout <commit-hash>
+```
+
+---
+
+<div align="center">
+
+**🤖 Trained with ❤️ using Gensyn RL-Swarm**
+
+[![Gensyn](https://img.shields.io/badge/Powered%20by-Gensyn%20AI-orange?style=for-the-badge)](https://gensyn.ai)
+
+</div>
\ No newline at end of file
diff --git a/config.json b/config.json
new file mode 100644
index 0000000..2cff534
--- /dev/null
+++ b/config.json
@@ -0,0 +1,30 @@
+{
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "max_position_embeddings": 40960,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000..e4f1d31
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,13 @@
+{
+  "bos_token_id": 151643,
+  "do_sample": true,
+  "eos_token_id": [
+    151645,
+    151643
+  ],
+  "pad_token_id": 151643,
+  "temperature": 0.6,
+  "top_k": 20,
+  "top_p": 0.95,
+  "transformers_version": "4.51.3"
+}
diff --git a/model.safetensors b/model.safetensors
new file mode 100644
index 0000000..80ba974
--- /dev/null
+++ b/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5dab513aba5eab584b0c2e029bd3df4a2b2ce34109af35babe9e178dd193602c
+size 1192135096