commit 2e4a8d6a8318f7393848c4b353077dab400dc0a4 Author: ModelHub XC Date: Tue Apr 28 07:59:12 2026 +0800 初始化项目,由ModelHub XC社区提供模型 Model: abhid1234/qwen-0.5b-tool-agent-grpo Source: Original Platform diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..bc5f30d --- /dev/null +++ b/README.md @@ -0,0 +1,199 @@ +--- +library_name: transformers +tags: [] +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + +This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated. + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] \ No newline at end of file diff --git a/artifacts/eval_results.json b/artifacts/eval_results.json new file mode 100644 index 0000000..84071a5 --- /dev/null +++ b/artifacts/eval_results.json @@ -0,0 +1,412 @@ +{ + "step": 15, + "scenarios_path": "data/scenarios_val.jsonl", + "num_generations": 8, + "total_scenarios": 50, + "total_rollouts": 400, + "successes": 18, + "accuracy_pct": 4.5, + "avg_reward": -1.8850000000000002, + "per_scenario": [ + { + "scenario_index": 0, + "task": "Convert 98 kg to lbs.", + "mean_reward": 3.125, + "max_reward": 4.0, + "success_count": 7, + "total_attempts": 8 + }, + { + "scenario_index": 1, + "task": "What is the speed of light?", + "mean_reward": -2.5, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 2, + "task": "What is the distance from Earth to the Sun in km in miles?", + "mean_reward": 0.5625, + "max_reward": 2.5, + "success_count": 1, + "total_attempts": 8 + }, + { + "scenario_index": 3, + "task": "What is 441 plus 23?", + "mean_reward": 2.25, + "max_reward": 4.0, + "success_count": 1, + "total_attempts": 8 + }, + { + "scenario_index": 4, + "task": "Convert 62 kg to lbs.", + "mean_reward": -3.0, + "max_reward": -3.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 5, + "task": "Which is hotter right now, London or Mumbai?", + "mean_reward": -2.875, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 6, + "task": "What is 185 plus 89?", + "mean_reward": -3.0, + "max_reward": -3.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 7, + "task": "What's the weather like in Dubai?", + "mean_reward": -1.375, + "max_reward": 4.0, + "success_count": 2, + "total_attempts": 8 + }, + { + "scenario_index": 8, + "task": "What is the population of Germany divided by its area in km2?", + "mean_reward": -2.041666666666667, + "max_reward": 1.666666666666666, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 9, + "task": "What is the boiling point of water?", + "mean_reward": -3.0, + "max_reward": -3.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 10, + "task": "Which is hotter right now, London or Mumbai?", + "mean_reward": -2.0, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 11, + "task": "What is the population of India divided by its area in km2?", + "mean_reward": -2.125, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 12, + "task": "What is India's population density in people per square mile?", + "mean_reward": -1.25, + "max_reward": 1.333333333333333, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 13, + "task": "What is the tallest mountain?", + "mean_reward": -2.875, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 14, + "task": "What is the distance from Earth to the Sun in km in miles?", + "mean_reward": -1.875, + "max_reward": 1.5, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 15, + "task": "What is the population of Japan divided by its area in km2?", + "mean_reward": -2.875, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 16, + "task": "What is Germany's population density in people per square mile?", + "mean_reward": -3.0, + "max_reward": -3.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 17, + "task": "Convert 74 kg to lbs.", + "mean_reward": -3.0, + "max_reward": -3.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 18, + "task": "Which is hotter right now, Paris or Cairo?", + "mean_reward": -3.0, + "max_reward": -3.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 19, + "task": "What is India's population density in people per square mile?", + "mean_reward": -1.5, + "max_reward": 1.333333333333333, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 20, + "task": "Which country has a larger population, France or Brazil?", + "mean_reward": -3.0, + "max_reward": -3.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 21, + "task": "Convert 64 kg to lbs.", + "mean_reward": -3.0, + "max_reward": -3.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 22, + "task": "Which country has a larger population, Japan or India?", + "mean_reward": -1.5625, + "max_reward": 3.0, + "success_count": 2, + "total_attempts": 8 + }, + { + "scenario_index": 23, + "task": "What is the GDP of Japan?", + "mean_reward": -2.75, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 24, + "task": "What is the population of France divided by its area in km2?", + "mean_reward": -2.875, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 25, + "task": "What is France's population density in people per square mile?", + "mean_reward": -2.5, + "max_reward": 1.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 26, + "task": "Convert 26 kg to lbs.", + "mean_reward": -3.0, + "max_reward": -3.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 27, + "task": "What is 660 times 87?", + "mean_reward": 1.0, + "max_reward": 4.0, + "success_count": 1, + "total_attempts": 8 + }, + { + "scenario_index": 28, + "task": "What is the boiling point of water?", + "mean_reward": -2.5, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 29, + "task": "What is the population of Germany divided by its area in km2?", + "mean_reward": -1.125, + "max_reward": 2.333333333333333, + "success_count": 1, + "total_attempts": 8 + }, + { + "scenario_index": 30, + "task": "Convert 40 kg to lbs.", + "mean_reward": -3.0, + "max_reward": -3.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 31, + "task": "What is the speed of light?", + "mean_reward": -2.375, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 32, + "task": "How old was Guido van Rossum in 2024?", + "mean_reward": -2.875, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 33, + "task": "Which is hotter right now, Paris or Dubai?", + "mean_reward": -3.0, + "max_reward": -3.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 34, + "task": "Which is hotter right now, Tokyo or Dubai?", + "mean_reward": -2.875, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 35, + "task": "Which is hotter right now, London or Cairo?", + "mean_reward": -2.375, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 36, + "task": "What is the value of pi?", + "mean_reward": -2.125, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 37, + "task": "What is the population of Japan divided by its area in km2?", + "mean_reward": -1.6666666666666667, + "max_reward": 1.666666666666666, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 38, + "task": "What is the temperature in London in Fahrenheit?", + "mean_reward": -1.375, + "max_reward": 1.5, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 39, + "task": "What is 464 plus 30?", + "mean_reward": -1.75, + "max_reward": 2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 40, + "task": "Which country has a larger population, France or India?", + "mean_reward": -2.1875, + "max_reward": 2.5, + "success_count": 1, + "total_attempts": 8 + }, + { + "scenario_index": 41, + "task": "What is the distance from Earth to the Sun in km in miles?", + "mean_reward": -1.625, + "max_reward": 2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 42, + "task": "What is the tallest mountain?", + "mean_reward": -1.125, + "max_reward": 2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 43, + "task": "What is the temperature in London in Fahrenheit?", + "mean_reward": 0.6875, + "max_reward": 2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 44, + "task": "What is 496 minus 24?", + "mean_reward": 1.0, + "max_reward": 2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 45, + "task": "What's the weather like in Cairo?", + "mean_reward": -1.25, + "max_reward": 4.0, + "success_count": 2, + "total_attempts": 8 + }, + { + "scenario_index": 46, + "task": "What is the tallest mountain?", + "mean_reward": -2.5, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 47, + "task": "What is India's population density in people per square mile?", + "mean_reward": -1.7916666666666667, + "max_reward": 1.333333333333333, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 48, + "task": "What is the GDP of France?", + "mean_reward": -2.625, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + }, + { + "scenario_index": 49, + "task": "How old was Guido van Rossum in 2024?", + "mean_reward": -2.75, + "max_reward": -2.0, + "success_count": 0, + "total_attempts": 8 + } + ] +} \ No newline at end of file diff --git a/artifacts/reward_curve.txt b/artifacts/reward_curve.txt new file mode 100644 index 0000000..7a6e995 --- /dev/null +++ b/artifacts/reward_curve.txt @@ -0,0 +1,16 @@ + Avg reward: -0.208 | Avg tools/rollout: 0.9 | groups with variance: 4/4 + Avg reward: 1.969 | Avg tools/rollout: 1.0 | groups with variance: 1/4 + Avg reward: 0.854 | Avg tools/rollout: 1.0 | groups with variance: 4/4 + Avg reward: 1.193 | Avg tools/rollout: 0.9 | groups with variance: 3/4 + Avg reward: -2.094 | Avg tools/rollout: 0.8 | groups with variance: 3/4 + Avg reward: 0.505 | Avg tools/rollout: 0.9 | groups with variance: 4/4 + Avg reward: -0.141 | Avg tools/rollout: 0.8 | groups with variance: 4/4 + Avg reward: -0.797 | Avg tools/rollout: 0.9 | groups with variance: 4/4 + Avg reward: 0.307 | Avg tools/rollout: 0.9 | groups with variance: 3/4 + Avg reward: -1.125 | Avg tools/rollout: 1.0 | groups with variance: 1/4 + Avg reward: -1.359 | Avg tools/rollout: 0.9 | groups with variance: 4/4 + Avg reward: 0.484 | Avg tools/rollout: 1.0 | groups with variance: 3/4 + Avg reward: -0.073 | Avg tools/rollout: 0.9 | groups with variance: 4/4 + Avg reward: 1.740 | Avg tools/rollout: 1.0 | groups with variance: 3/4 + Avg reward: 0.635 | Avg tools/rollout: 1.0 | groups with variance: 3/4 + Avg reward: 1.615 | Avg tools/rollout: 0.9 | groups with variance: 2/4 diff --git a/artifacts/training.log b/artifacts/training.log new file mode 100644 index 0000000..9c114ca --- /dev/null +++ b/artifacts/training.log @@ -0,0 +1,1054 @@ +Skipping import of cpp extensions due to incompatible torch version 2.10.0+cu128 for torchao version 0.15.0 Please see https://github.com/pytorch/ao/issues/2919 for more info +Loaded 200 train, 50 val scenarios +GRPO config: 4 scenarios/step × 8 rollouts/scenario = 32 rollouts/step +Skipping import of cpp extensions due to incompatible torch version 2.10.0+cu128 for torchao version 0.15.0 Please see https://github.com/pytorch/ao/issues/2919 for more info +/usr/local/lib/python3.12/dist-packages/art/__init__.py:37: UserWarning: WARNING: Unsloth should be imported before [transformers] to ensure all optimizations are applied. Your code may run slower or encounter memory issues without these optimizations. + +Please restructure your imports with 'import unsloth' at the top of your file. + import unsloth # noqa: F401 +🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning. +🦥 Unsloth Zoo will now patch everything to make training faster! +==((====))== Unsloth 2026.3.3: Fast Qwen2 patching. Transformers: 5.2.0. vLLM: 0.17.0+art1. + \\ /| NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.252 GB. Platform: Linux. +O^O/ \_/ \ Torch: 2.10.0+cu128. CUDA: 8.0. CUDA Toolkit: 12.8. Triton: 3.6.0 +\ / Bfloat16 = TRUE. FA [Xformers = 0.0.35. FA2 = False] + "-____-" Free license: http://github.com/unslothai/unsloth +Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored! + Loading weights: 0%| | 0/290 [00:00. +Unsloth 2026.3.3 patched 24 layers with 24 QKV layers, 24 O layers and 24 MLP layers. +Warning: `huggingface-cli` is deprecated and no longer works. Use `hf` instead. + +Hint: `hf` is already installed! Use it directly. + +Hint: Examples: + hf auth login + hf download unsloth/gemma-4-31B-it-GGUF + hf upload my-cool-model . . + hf models ls --search "gemma" + hf repos ls --format json + hf jobs run python:3.12 python -c 'print("Hello!")' + hf --help + +INFO 04-13 02:20:55 [model.py:531] Resolved architecture: Qwen2ForCausalLM +INFO 04-13 02:20:55 [model.py:1554] Using max model len 32768 +INFO 04-13 02:20:55 [scheduler.py:231] Chunked prefill is enabled with max_num_batched_tokens=2048. +INFO 04-13 02:20:55 [vllm.py:747] Asynchronous scheduling is enabled. +WARNING 04-13 02:20:57 [system_utils.py:152] We must use the `spawn` multiprocessing start method. Overriding VLLM_WORKER_MULTIPROC_METHOD to 'spawn'. See https://docs.vllm.ai/en/latest/usage/troubleshooting.html#python-multiprocessing for more information. Reasons: CUDA is initialized +Skipping import of cpp extensions due to incompatible torch version 2.10.0+cu128 for torchao version 0.15.0 Please see https://github.com/pytorch/ao/issues/2919 for more info +/usr/local/lib/python3.12/dist-packages/art/__init__.py:37: UserWarning: WARNING: Unsloth should be imported before [transformers] to ensure all optimizations are applied. Your code may run slower or encounter memory issues without these optimizations. + +Please restructure your imports with 'import unsloth' at the top of your file. + import unsloth # noqa: F401 +🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning. +🦥 Unsloth Zoo will now patch everything to make training faster! +(EngineCore_DP0 pid=13597) INFO 04-13 02:21:21 [core.py:101] Initializing a V1 LLM engine (v0.17.0+art1) with config: model='Qwen/Qwen2.5-0.5B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-0.5B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, data_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, enable_return_routed_experts=False, kv_cache_dtype=auto, device_config=cuda, structured_outputs_config=StructuredOutputsConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_parser='', reasoning_parser_plugin='', enable_in_reasoning=False), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, kv_cache_metrics=False, kv_cache_metrics_sample=0.01, cudagraph_metrics=False, enable_layerwise_nvtx_tracing=False, enable_mfu_metrics=False, enable_mm_processor_stats=False, enable_logging_iteration_details=False), seed=0, served_model_name=Qwen/Qwen2.5-0.5B-Instruct, enable_prefix_caching=True, enable_chunked_prefill=True, pooler_config=None, compilation_config={'level': None, 'mode': , 'debug_dump_path': None, 'cache_dir': '', 'compile_cache_save_format': 'binary', 'backend': 'inductor', 'custom_ops': ['none'], 'splitting_ops': ['vllm::unified_attention', 'vllm::unified_attention_with_output', 'vllm::unified_mla_attention', 'vllm::unified_mla_attention_with_output', 'vllm::mamba_mixer2', 'vllm::mamba_mixer', 'vllm::short_conv', 'vllm::linear_attention', 'vllm::plamo2_mamba_mixer', 'vllm::gdn_attention_core', 'vllm::kda_attention', 'vllm::sparse_attn_indexer', 'vllm::rocm_aiter_sparse_attn_indexer', 'vllm::unified_kv_cache_update', 'vllm::unified_mla_kv_cache_update'], 'compile_mm_encoder': False, 'compile_sizes': [], 'compile_ranges_split_points': [2048], 'inductor_compile_config': {'enable_auto_functionalized_v2': False, 'combo_kernels': True, 'benchmark_combo_kernel': True}, 'inductor_passes': {}, 'cudagraph_mode': , 'cudagraph_num_of_warmups': 1, 'cudagraph_capture_sizes': [1, 2, 4, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256], 'cudagraph_copy_inputs': False, 'cudagraph_specialize_lora': True, 'use_inductor_graph_partition': False, 'pass_config': {'fuse_norm_quant': False, 'fuse_act_quant': False, 'fuse_attn_quant': False, 'enable_sp': False, 'fuse_gemm_comms': False, 'fuse_allreduce_rms': False}, 'max_cudagraph_capture_size': 256, 'dynamic_shapes_config': {'type': , 'evaluate_guards': False, 'assume_32_bit_indexing': False}, 'local_cache_dir': None, 'fast_moe_cold_start': True, 'static_all_moe_layers': []} +(EngineCore_DP0 pid=13597) INFO 04-13 02:21:21 [worker_base.py:283] Injected into for extended collective_rpc calls ['run', 'time'] +(EngineCore_DP0 pid=13597) INFO 04-13 02:21:21 [parallel_state.py:1393] world_size=1 rank=0 local_rank=0 distributed_init_method=tcp://172.21.0.2:53693 backend=nccl +(EngineCore_DP0 pid=13597) INFO 04-13 02:21:21 [parallel_state.py:1715] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, PCP rank 0, TP rank 0, EP rank N/A, EPLB rank N/A +(EngineCore_DP0 pid=13597) INFO 04-13 02:21:22 [base.py:106] Offloader set to NoopOffloader +(EngineCore_DP0 pid=13597) INFO 04-13 02:21:22 [gpu_model_runner.py:4255] Starting to load model Qwen/Qwen2.5-0.5B-Instruct... +(EngineCore_DP0 pid=13597) INFO 04-13 02:21:23 [cuda.py:405] Using FLASH_ATTN attention backend out of potential backends: ['FLASH_ATTN', 'FLASHINFER', 'TRITON_ATTN', 'FLEX_ATTENTION']. +(EngineCore_DP0 pid=13597) INFO 04-13 02:21:23 [flash_attn.py:587] Using FlashAttention version 2 +(EngineCore_DP0 pid=13597) :1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead. +(EngineCore_DP0 pid=13597) :1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead. +(EngineCore_DP0 pid=13597) INFO 04-13 02:21:24 [weight_utils.py:601] No model.safetensors.index.json found in remote. +(EngineCore_DP0 pid=13597) Loading safetensors checkpoint shards: 0% Completed | 0/1 [00:00:1: SyntaxWarning: 'float' object is not callable; perhaps you missed a comma? +[rollout] caught BadRequestError: Error code: 400 - {'error': {'message': 'Already borrowed', 'type': 'BadRequestError', 'param': None, 'code': 400}} + validation: 63%|██████▎ | 253/400 [00:50<01:18, 1.88it/s, reward=-1.77, num_turns=1.54, num_tools=0.542, failed=0.557, completion_tokens=34.4] validation: 64%|██████▎ | 254/400 [00:50<01:17, 1.88it/s, reward=-1.77, num_turns=1.54, num_tools=0.543, failed=0.555, completion_tokens=34.5] validation: 64%|██████▍ | 255/400 [00:50<01:17, 1.88it/s, reward=-1.77, num_turns=1.55, num_tools=0.545, failed=0.553, completion_tokens=34.7] validation: 64%|██████▍ | 256/400 [00:50<01:16, 1.88it/s, reward=-1.78, num_turns=1.55, num_tools=0.547, failed=0.555, completion_tokens=34.6] validation: 64%|██████▍ | 257/400 [00:50<01:16, 1.88it/s, reward=-1.78, num_turns=1.55, num_tools=0.549, failed=0.553, completion_tokens=34.7] validation: 64%|██████▍ | 258/400 [00:50<01:15, 1.88it/s, reward=-1.78, num_turns=1.55, num_tools=0.55, failed=0.55, completion_tokens=34.8] validation: 65%|██████▍ | 259/400 [00:50<01:15, 1.88it/s, reward=-1.78, num_turns=1.55, num_tools=0.552, failed=0.548, completion_tokens=35] validation: 65%|██████▌ | 260/400 [00:50<01:14, 1.88it/s, reward=-1.78, num_turns=1.55, num_tools=0.554, failed=0.546, completion_tokens=35.1] validation: 65%|██████▌ | 261/400 [00:50<01:14, 1.88it/s, reward=-1.78, num_turns=1.56, num_tools=0.556, failed=0.544, completion_tokens=35.1] validation: 66%|██████▌ | 262/400 [00:50<01:13, 1.88it/s, reward=-1.78, num_turns=1.56, num_tools=0.557, failed=0.542, completion_tokens=35.1] validation: 66%|██████▌ | 263/400 [00:50<01:13, 1.88it/s, reward=-1.77, num_turns=1.56, num_tools=0.559, failed=0.54, completion_tokens=35.3] validation: 66%|██████▌ | 264/400 [00:50<01:12, 1.88it/s, reward=-1.76, num_turns=1.56, num_tools=0.561, failed=0.538, completion_tokens=35.3] validation: 66%|██████▋ | 265/400 [00:50<01:11, 1.88it/s, reward=-1.75, num_turns=1.56, num_tools=0.562, failed=0.536, completion_tokens=35.3] validation: 66%|██████▋ | 266/400 [00:50<00:51, 2.62it/s, reward=-1.75, num_turns=1.56, num_tools=0.562, failed=0.536, completion_tokens=35.3] validation: 66%|██████▋ | 266/400 [00:50<00:51, 2.62it/s, reward=-1.73, num_turns=1.56, num_tools=0.564, failed=0.534, completion_tokens=35.3] validation: 67%|██████▋ | 267/400 [00:50<00:50, 2.62it/s, reward=-1.72, num_turns=1.57, num_tools=0.566, failed=0.532, completion_tokens=35.3] validation: 67%|██████▋ | 268/400 [00:50<00:50, 2.62it/s, reward=-1.71, num_turns=1.57, num_tools=0.567, failed=0.53, completion_tokens=35.4] validation: 67%|██████▋ | 269/400 [00:50<00:49, 2.62it/s, reward=-1.7, num_turns=1.57, num_tools=0.569, failed=0.528, completion_tokens=35.7] validation: 68%|██████▊ | 270/400 [00:50<00:49, 2.62it/s, reward=-1.69, num_turns=1.57, num_tools=0.57, failed=0.526, completion_tokens=36.2] validation: 68%|██████▊ | 271/400 [00:50<00:49, 2.62it/s, reward=-1.68, num_turns=1.57, num_tools=0.572, failed=0.524, completion_tokens=36.3] validation: 68%|██████▊ | 272/400 [00:50<00:48, 2.62it/s, reward=-1.68, num_turns=1.57, num_tools=0.574, failed=0.522, completion_tokens=36.3] validation: 68%|██████▊ | 273/400 [00:50<00:48, 2.62it/s, reward=-1.66, num_turns=1.58, num_tools=0.575, failed=0.52, completion_tokens=36.4] validation: 68%|██████▊ | 274/400 [00:50<00:48, 2.62it/s, reward=-1.65, num_turns=1.58, num_tools=0.577, failed=0.518, completion_tokens=36.7] validation: 69%|██████▉ | 275/400 [00:50<00:47, 2.62it/s, reward=-1.63, num_turns=1.58, num_tools=0.582, failed=0.516, completion_tokens=36.8] validation: 69%|██████▉ | 276/400 [00:50<00:47, 2.62it/s, reward=-1.62, num_turns=1.58, num_tools=0.583, failed=0.514, completion_tokens=36.8] validation: 69%|██████▉ | 277/400 [00:50<00:46, 2.62it/s, reward=-1.61, num_turns=1.58, num_tools=0.585, failed=0.513, completion_tokens=36.9] validation: 70%|██████▉ | 278/400 [00:50<00:46, 2.62it/s, reward=-1.61, num_turns=1.59, num_tools=0.586, failed=0.511, completion_tokens=37.1] validation: 70%|██████▉ | 279/400 [00:50<00:46, 2.62it/s, reward=-1.61, num_turns=1.59, num_tools=0.588, failed=0.509, completion_tokens=37.4] validation: 70%|███████ | 280/400 [00:50<00:45, 2.62it/s, reward=-1.61, num_turns=1.59, num_tools=0.589, failed=0.507, completion_tokens=37.7] validation: 70%|███████ | 281/400 [00:51<00:32, 3.71it/s, reward=-1.61, num_turns=1.59, num_tools=0.589, failed=0.507, completion_tokens=37.7] validation: 70%|███████ | 281/400 [00:51<00:32, 3.71it/s, reward=-1.62, num_turns=1.59, num_tools=0.591, failed=0.505, completion_tokens=37.7] validation: 70%|███████ | 282/400 [00:51<00:31, 3.71it/s, reward=-1.62, num_turns=1.59, num_tools=0.592, failed=0.504, completion_tokens=37.7] validation: 71%|███████ | 283/400 [00:51<00:31, 3.71it/s, reward=-1.62, num_turns=1.59, num_tools=0.594, failed=0.502, completion_tokens=37.7] validation: 71%|███████ | 284/400 [00:51<00:31, 3.71it/s, reward=-1.62, num_turns=1.6, num_tools=0.595, failed=0.5, completion_tokens=37.8] validation: 71%|███████▏ | 285/400 [00:51<00:31, 3.71it/s, reward=-1.61, num_turns=1.6, num_tools=0.596, failed=0.498, completion_tokens=37.8] validation: 72%|███████▏ | 286/400 [00:51<00:30, 3.71it/s, reward=-1.59, num_turns=1.6, num_tools=0.598, failed=0.497, completion_tokens=37.7] validation: 72%|███████▏ | 287/400 [00:51<00:30, 3.71it/s, reward=-1.57, num_turns=1.6, num_tools=0.599, failed=0.495, completion_tokens=37.7] validation: 72%|███████▏ | 288/400 [00:51<00:30, 3.71it/s, reward=-1.55, num_turns=1.6, num_tools=0.601, failed=0.493, completion_tokens=37.6] validation: 72%|███████▏ | 289/400 [00:51<00:29, 3.71it/s, reward=-1.54, num_turns=1.6, num_tools=0.602, failed=0.491, completion_tokens=37.6] validation: 72%|███████▎ | 290/400 [00:51<00:29, 3.71it/s, reward=-1.53, num_turns=1.6, num_tools=0.603, failed=0.49, completion_tokens=37.8] validation: 73%|███████▎ | 291/400 [00:51<00:22, 4.74it/s, reward=-1.53, num_turns=1.6, num_tools=0.603, failed=0.49, completion_tokens=37.8] validation: 73%|███████▎ | 291/400 [00:51<00:22, 4.74it/s, reward=-1.53, num_turns=1.6, num_tools=0.605, failed=0.488, completion_tokens=37.8] validation: 73%|███████▎ | 292/400 [00:51<00:22, 4.74it/s, reward=-1.52, num_turns=1.61, num_tools=0.606, failed=0.486, completion_tokens=38] validation: 73%|███████▎ | 293/400 [00:51<00:22, 4.74it/s, reward=-1.52, num_turns=1.61, num_tools=0.608, failed=0.485, completion_tokens=38] validation: 74%|███████▎ | 294/400 [00:51<00:22, 4.74it/s, reward=-1.51, num_turns=1.61, num_tools=0.609, failed=0.483, completion_tokens=37.9] validation: 74%|███████▍ | 295/400 [00:51<00:22, 4.74it/s, reward=-1.5, num_turns=1.61, num_tools=0.61, failed=0.481, completion_tokens=37.8] validation: 74%|███████▍ | 296/400 [00:51<00:21, 4.74it/s, reward=-1.49, num_turns=1.61, num_tools=0.611, failed=0.48, completion_tokens=37.8] validation: 74%|███████▍ | 297/400 [00:51<00:21, 4.74it/s, reward=-1.49, num_turns=1.61, num_tools=0.613, failed=0.478, completion_tokens=37.7] validation: 74%|███████▍ | 298/400 [00:51<00:21, 4.74it/s, reward=-1.49, num_turns=1.61, num_tools=0.614, failed=0.477, completion_tokens=37.7] validation: 75%|███████▍ | 299/400 [00:51<00:21, 4.74it/s, reward=-1.49, num_turns=1.62, num_tools=0.615, failed=0.475, completion_tokens=37.7] validation: 75%|███████▌ | 300/400 [00:51<00:21, 4.74it/s, reward=-1.48, num_turns=1.62, num_tools=0.617, failed=0.473, completion_tokens=37.7] validation: 75%|███████▌ | 301/400 [00:51<00:20, 4.74it/s, reward=-1.48, num_turns=1.62, num_tools=0.618, failed=0.472, completion_tokens=38] validation: 76%|███████▌ | 302/400 [00:51<00:20, 4.74it/s, reward=-1.47, num_turns=1.62, num_tools=0.619, failed=0.47, completion_tokens=38] validation: 76%|███████▌ | 303/400 [00:51<00:20, 4.74it/s, reward=-1.45, num_turns=1.62, num_tools=0.62, failed=0.469, completion_tokens=37.9] validation: 76%|███████▌ | 304/400 [00:51<00:20, 4.74it/s, reward=-1.43, num_turns=1.62, num_tools=0.622, failed=0.467, completion_tokens=37.9] validation: 76%|███████▋ | 305/400 [00:51<00:20, 4.74it/s, reward=-1.41, num_turns=1.62, num_tools=0.623, failed=0.466, completion_tokens=37.8] validation: 76%|███████▋ | 306/400 [00:51<00:19, 4.74it/s, reward=-1.4, num_turns=1.62, num_tools=0.624, failed=0.464, completion_tokens=37.8] validation: 77%|███████▋ | 307/400 [00:51<00:19, 4.74it/s, reward=-1.4, num_turns=1.63, num_tools=0.625, failed=0.463, completion_tokens=37.8] validation: 77%|███████▋ | 308/400 [00:51<00:19, 4.74it/s, reward=-1.4, num_turns=1.63, num_tools=0.627, failed=0.461, completion_tokens=37.8] validation: 77%|███████▋ | 309/400 [00:51<00:19, 4.74it/s, reward=-1.4, num_turns=1.63, num_tools=0.628, failed=0.46, completion_tokens=37.8] validation: 78%|███████▊ | 310/400 [00:51<00:18, 4.74it/s, reward=-1.41, num_turns=1.63, num_tools=0.626, failed=0.458, completion_tokens=38.2] validation: 78%|███████▊ | 311/400 [00:51<00:18, 4.74it/s, reward=-1.41, num_turns=1.63, num_tools=0.627, failed=0.457, completion_tokens=38.3] validation: 78%|███████▊ | 312/400 [00:51<00:18, 4.74it/s, reward=-1.41, num_turns=1.63, num_tools=0.628, failed=0.455, completion_tokens=38.4] validation: 78%|███████▊ | 313/400 [00:51<00:18, 4.74it/s, reward=-1.41, num_turns=1.63, num_tools=0.629, failed=0.454, completion_tokens=38.8] validation: 78%|███████▊ | 314/400 [00:51<00:18, 4.74it/s, reward=-1.42, num_turns=1.63, num_tools=0.627, failed=0.452, completion_tokens=39.1] validation: 79%|███████▉ | 315/400 [00:51<00:17, 4.74it/s, reward=-1.42, num_turns=1.63, num_tools=0.629, failed=0.451, completion_tokens=39.1] validation: 79%|███████▉ | 316/400 [00:51<00:17, 4.74it/s, reward=-1.42, num_turns=1.63, num_tools=0.63, failed=0.449, completion_tokens=39.1] validation: 79%|███████▉ | 317/400 [00:51<00:17, 4.74it/s, reward=-1.42, num_turns=1.63, num_tools=0.631, failed=0.448, completion_tokens=39.1] validation: 80%|███████▉ | 318/400 [00:51<00:17, 4.74it/s, reward=-1.43, num_turns=1.63, num_tools=0.632, failed=0.447, completion_tokens=39.2] validation: 80%|███████▉ | 319/400 [00:51<00:17, 4.74it/s, reward=-1.43, num_turns=1.63, num_tools=0.633, failed=0.445, completion_tokens=39.2] validation: 80%|████████ | 320/400 [00:51<00:16, 4.74it/s, reward=-1.43, num_turns=1.63, num_tools=0.634, failed=0.444, completion_tokens=39.2] validation: 80%|████████ | 321/400 [00:51<00:16, 4.74it/s, reward=-1.42, num_turns=1.64, num_tools=0.636, failed=0.442, completion_tokens=39.1] validation: 80%|████████ | 322/400 [00:51<00:16, 4.74it/s, reward=-1.41, num_turns=1.64, num_tools=0.637, failed=0.441, completion_tokens=39.1] validation: 81%|████████ | 323/400 [00:51<00:16, 4.74it/s, reward=-1.4, num_turns=1.64, num_tools=0.638, failed=0.44, completion_tokens=39] validation: 81%|████████ | 324/400 [00:51<00:16, 4.74it/s, reward=-1.4, num_turns=1.64, num_tools=0.639, failed=0.438, completion_tokens=38.9] validation: 81%|████████▏ | 325/400 [00:51<00:15, 4.74it/s, reward=-1.4, num_turns=1.64, num_tools=0.64, failed=0.437, completion_tokens=38.9] validation: 82%|████████▏ | 326/400 [00:51<00:15, 4.74it/s, reward=-1.4, num_turns=1.64, num_tools=0.641, failed=0.436, completion_tokens=38.9] validation: 82%|████████▏ | 327/400 [00:51<00:15, 4.74it/s, reward=-1.41, num_turns=1.64, num_tools=0.642, failed=0.434, completion_tokens=38.9] validation: 82%|████████▏ | 328/400 [00:51<00:15, 4.74it/s, reward=-1.41, num_turns=1.64, num_tools=0.643, failed=0.433, completion_tokens=39] validation: 82%|████████▏ | 329/400 [00:51<00:14, 4.74it/s, reward=-1.41, num_turns=1.64, num_tools=0.644, failed=0.432, completion_tokens=39] validation: 82%|████████▎ | 330/400 [00:51<00:14, 4.74it/s, reward=-1.41, num_turns=1.65, num_tools=0.645, failed=0.43, completion_tokens=39] validation: 83%|████████▎ | 331/400 [00:51<00:14, 4.74it/s, reward=-1.41, num_turns=1.65, num_tools=0.647, failed=0.429, completion_tokens=39] validation: 83%|████████▎ | 332/400 [00:51<00:14, 4.74it/s, reward=-1.41, num_turns=1.65, num_tools=0.648, failed=0.428, completion_tokens=38.9] validation: 83%|████████▎ | 333/400 [00:51<00:14, 4.74it/s, reward=-1.4, num_turns=1.65, num_tools=0.649, failed=0.426, completion_tokens=38.9] validation: 84%|████████▎ | 334/400 [00:51<00:13, 4.74it/s, reward=-1.41, num_turns=1.65, num_tools=0.647, failed=0.425, completion_tokens=39] validation: 84%|████████▍ | 335/400 [00:51<00:13, 4.74it/s, reward=-1.4, num_turns=1.65, num_tools=0.648, failed=0.424, completion_tokens=39.4] validation: 84%|████████▍ | 336/400 [00:51<00:13, 4.74it/s, reward=-1.39, num_turns=1.65, num_tools=0.649, failed=0.423, completion_tokens=39.4] validation: 84%|████████▍ | 337/400 [00:51<00:13, 4.74it/s, reward=-1.39, num_turns=1.65, num_tools=0.65, failed=0.421, completion_tokens=39.4] validation: 84%|████████▍ | 338/400 [00:51<00:13, 4.74it/s, reward=-1.39, num_turns=1.65, num_tools=0.651, failed=0.42, completion_tokens=39.4] validation: 85%|████████▍ | 339/400 [00:51<00:12, 4.74it/s, reward=-1.39, num_turns=1.65, num_tools=0.652, failed=0.419, completion_tokens=39.4] validation: 85%|████████▌ | 340/400 [00:51<00:12, 4.74it/s, reward=-1.39, num_turns=1.65, num_tools=0.65, failed=0.418, completion_tokens=40.3] validation: 85%|████████▌ | 341/400 [00:51<00:12, 4.74it/s, reward=-1.4, num_turns=1.65, num_tools=0.651, failed=0.416, completion_tokens=40.4] validation: 86%|████████▌ | 342/400 [00:51<00:12, 4.74it/s, reward=-1.4, num_turns=1.65, num_tools=0.652, failed=0.415, completion_tokens=40.5] validation: 86%|████████▌ | 343/400 [00:51<00:12, 4.74it/s, reward=-1.39, num_turns=1.65, num_tools=0.653, failed=0.414, completion_tokens=40.6] validation: 86%|████████▌ | 344/400 [00:51<00:11, 4.74it/s, reward=-1.39, num_turns=1.65, num_tools=0.654, failed=0.413, completion_tokens=41] validation: 86%|████████▋ | 345/400 [00:51<00:11, 4.74it/s, reward=-1.39, num_turns=1.66, num_tools=0.655, failed=0.412, completion_tokens=41.1] validation: 86%|████████▋ | 346/400 [00:51<00:11, 4.74it/s, reward=-1.39, num_turns=1.66, num_tools=0.656, failed=0.41, completion_tokens=41.1] validation: 87%|████████▋ | 347/400 [00:51<00:11, 4.74it/s, reward=-1.4, num_turns=1.65, num_tools=0.654, failed=0.409, completion_tokens=41.8] validation: 87%|████████▋ | 348/400 [00:51<00:10, 4.74it/s, reward=-1.39, num_turns=1.66, num_tools=0.655, failed=0.408, completion_tokens=42.1] validation: 87%|████████▋ | 349/400 [00:51<00:10, 4.74it/s, reward=-1.39, num_turns=1.66, num_tools=0.656, failed=0.407, completion_tokens=43.2] validation: 88%|████████▊ | 350/400 [00:51<00:10, 4.74it/s, reward=-1.4, num_turns=1.65, num_tools=0.654, failed=0.406, completion_tokens=44.4] validation: 88%|████████▊ | 351/400 [00:51<00:10, 4.74it/s, reward=-1.4, num_turns=1.65, num_tools=0.652, failed=0.405, completion_tokens=45.4] validation: 88%|████████▊ | 352/400 [00:51<00:10, 4.74it/s, reward=-1.41, num_turns=1.65, num_tools=0.651, failed=0.403, completion_tokens=47.3] validation: 88%|████████▊ | 353/400 [00:51<00:09, 4.74it/s, reward=-1.41, num_turns=1.65, num_tools=0.652, failed=0.402, completion_tokens=47.8] validation: 88%|████████▊ | 354/400 [00:51<00:09, 4.74it/s, reward=-1.41, num_turns=1.65, num_tools=0.65, failed=0.401, completion_tokens=49.6] validation: 89%|████████▉ | 355/400 [00:51<00:09, 4.74it/s, reward=-1.42, num_turns=1.65, num_tools=0.648, failed=0.4, completion_tokens=51.4] validation: 89%|████████▉ | 356/400 [00:51<00:09, 4.74it/s, reward=-1.42, num_turns=1.65, num_tools=0.646, failed=0.399, completion_tokens=53.2] validation: 89%|████████▉ | 357/400 [00:51<00:09, 4.74it/s, reward=-1.43, num_turns=1.64, num_tools=0.644, failed=0.398, completion_tokens=54.4] validation: 90%|████████▉ | 358/400 [00:51<00:08, 4.74it/s, reward=-1.43, num_turns=1.65, num_tools=0.645, failed=0.397, completion_tokens=54.7] validation: 90%|████████▉ | 359/400 [00:51<00:08, 4.74it/s, reward=-1.42, num_turns=1.65, num_tools=0.646, failed=0.396, completion_tokens=55.1] validation: 90%|█████████ | 360/400 [00:51<00:08, 4.74it/s, reward=-1.43, num_turns=1.64, num_tools=0.644, failed=0.394, completion_tokens=56.4] validation: 90%|█████████ | 361/400 [00:51<00:08, 4.74it/s, reward=-1.43, num_turns=1.64, num_tools=0.643, failed=0.393, completion_tokens=58.2] validation: 90%|█████████ | 362/400 [00:51<00:08, 4.74it/s, reward=-1.44, num_turns=1.64, num_tools=0.641, failed=0.392, completion_tokens=59.9] validation: 91%|█████████ | 363/400 [00:51<00:02, 14.97it/s, reward=-1.44, num_turns=1.64, num_tools=0.641, failed=0.392, completion_tokens=59.9] validation: 91%|█████████ | 363/400 [00:51<00:02, 14.97it/s, reward=-1.44, num_turns=1.64, num_tools=0.642, failed=0.391, completion_tokens=59.8] validation: 91%|█████████ | 364/400 [00:51<00:02, 14.97it/s, reward=-1.43, num_turns=1.64, num_tools=0.643, failed=0.39, completion_tokens=59.6] validation: 91%|█████████▏| 365/400 [00:51<00:02, 14.97it/s, reward=-1.42, num_turns=1.64, num_tools=0.644, failed=0.389, completion_tokens=59.5] validation: 92%|█████████▏| 366/400 [00:51<00:02, 14.97it/s, reward=-1.42, num_turns=1.64, num_tools=0.645, failed=0.388, completion_tokens=59.4] validation: 92%|█████████▏| 367/400 [00:51<00:02, 14.97it/s, reward=-1.41, num_turns=1.65, num_tools=0.646, failed=0.387, completion_tokens=59.3] validation: 92%|█████████▏| 368/400 [00:51<00:02, 14.97it/s, reward=-1.4, num_turns=1.65, num_tools=0.647, failed=0.386, completion_tokens=59.1] validation: 92%|█████████▏| 369/400 [00:51<00:02, 14.97it/s, reward=-1.39, num_turns=1.65, num_tools=0.648, failed=0.385, completion_tokens=59] validation: 92%|█████████▎| 370/400 [00:51<00:02, 14.97it/s, reward=-1.4, num_turns=1.65, num_tools=0.649, failed=0.384, completion_tokens=58.9] validation: 93%|█████████▎| 371/400 [00:51<00:01, 14.97it/s, reward=-1.38, num_turns=1.65, num_tools=0.65, failed=0.383, completion_tokens=58.7] validation: 93%|█████████▎| 372/400 [00:51<00:01, 14.97it/s, reward=-1.37, num_turns=1.65, num_tools=0.651, failed=0.382, completion_tokens=58.6] validation: 93%|█████████▎| 373/400 [00:51<00:01, 14.97it/s, reward=-1.36, num_turns=1.65, num_tools=0.651, failed=0.381, completion_tokens=58.5] validation: 94%|█████████▎| 374/400 [00:51<00:01, 14.97it/s, reward=-1.35, num_turns=1.65, num_tools=0.652, failed=0.38, completion_tokens=58.3] validation: 94%|█████████▍| 375/400 [00:51<00:01, 14.97it/s, reward=-1.34, num_turns=1.65, num_tools=0.653, failed=0.379, completion_tokens=58.2] validation: 94%|█████████▍| 376/400 [00:51<00:01, 14.97it/s, reward=-1.33, num_turns=1.65, num_tools=0.654, failed=0.378, completion_tokens=58.1] validation: 94%|█████████▍| 377/400 [00:51<00:01, 14.97it/s, reward=-1.33, num_turns=1.66, num_tools=0.655, failed=0.377, completion_tokens=58] validation: 94%|█████████▍| 378/400 [00:51<00:01, 14.97it/s, reward=-1.31, num_turns=1.66, num_tools=0.656, failed=0.376, completion_tokens=57.9] validation: 95%|█████████▍| 379/400 [00:51<00:01, 14.97it/s, reward=-1.32, num_turns=1.66, num_tools=0.657, failed=0.375, completion_tokens=57.8] validation: 95%|█████████▌| 380/400 [00:51<00:01, 14.97it/s, reward=-1.3, num_turns=1.66, num_tools=0.658, failed=0.374, completion_tokens=57.7] validation: 95%|█████████▌| 381/400 [00:51<00:01, 14.97it/s, reward=-1.31, num_turns=1.66, num_tools=0.659, failed=0.373, completion_tokens=57.6] validation: 96%|█████████▌| 382/400 [00:51<00:01, 14.97it/s, reward=-1.31, num_turns=1.66, num_tools=0.66, failed=0.372, completion_tokens=57.5] validation: 96%|█████████▌| 383/400 [00:51<00:01, 14.97it/s, reward=-1.31, num_turns=1.66, num_tools=0.661, failed=0.371, completion_tokens=57.4] validation: 96%|█████████▌| 384/400 [00:51<00:01, 14.97it/s, reward=-1.3, num_turns=1.66, num_tools=0.661, failed=0.37, completion_tokens=57.2] validation: 96%|█████████▋| 385/400 [00:51<00:01, 14.97it/s, reward=-1.3, num_turns=1.66, num_tools=0.662, failed=0.369, completion_tokens=57.1] validation: 96%|█████████▋| 386/400 [00:51<00:00, 19.34it/s, reward=-1.3, num_turns=1.66, num_tools=0.662, failed=0.369, completion_tokens=57.1] validation: 96%|█████████▋| 386/400 [00:51<00:00, 19.34it/s, reward=-1.3, num_turns=1.66, num_tools=0.663, failed=0.368, completion_tokens=57.1] validation: 97%|█████████▋| 387/400 [00:51<00:00, 19.34it/s, reward=-1.3, num_turns=1.66, num_tools=0.664, failed=0.367, completion_tokens=57] validation: 97%|█████████▋| 388/400 [00:51<00:00, 19.34it/s, reward=-1.3, num_turns=1.66, num_tools=0.665, failed=0.366, completion_tokens=56.9] validation: 97%|█████████▋| 389/400 [00:51<00:00, 19.34it/s, reward=-1.29, num_turns=1.67, num_tools=0.666, failed=0.365, completion_tokens=56.8] validation: 98%|█████████▊| 390/400 [00:51<00:00, 19.34it/s, reward=-1.29, num_turns=1.67, num_tools=0.667, failed=0.364, completion_tokens=56.8] validation: 98%|█████████▊| 391/400 [00:51<00:00, 19.34it/s, reward=-1.29, num_turns=1.67, num_tools=0.668, failed=0.363, completion_tokens=56.7] validation: 98%|█████████▊| 392/400 [00:51<00:00, 19.34it/s, reward=-1.3, num_turns=1.67, num_tools=0.668, failed=0.362, completion_tokens=56.7] validation: 98%|█████████▊| 393/400 [00:51<00:00, 19.34it/s, reward=-1.29, num_turns=1.67, num_tools=0.672, failed=0.361, completion_tokens=56.8] validation: 98%|█████████▊| 394/400 [00:51<00:00, 19.34it/s, reward=-1.29, num_turns=1.67, num_tools=0.673, failed=0.36, completion_tokens=56.8] validation: 99%|█████████▉| 395/400 [00:52<00:00, 19.34it/s, reward=-1.29, num_turns=1.67, num_tools=0.673, failed=0.359, completion_tokens=56.8] validation: 99%|█████████▉| 396/400 [00:52<00:00, 19.34it/s, reward=-1.3, num_turns=1.67, num_tools=0.674, failed=0.359, completion_tokens=56.9] validation: 99%|█████████▉| 397/400 [00:52<00:00, 19.34it/s, reward=-1.3, num_turns=1.67, num_tools=0.675, failed=0.358, completion_tokens=57.1] validation: 100%|█████████▉| 398/400 [00:52<00:00, 19.34it/s, reward=-1.29, num_turns=1.67, num_tools=0.676, failed=0.357, completion_tokens=57.2] validation: 100%|█████████▉| 399/400 [00:52<00:00, 19.34it/s, reward=-1.29, num_turns=1.67, num_tools=0.677, failed=0.356, completion_tokens=57.7] validation: 100%|██████████| 400/400 [00:52<00:00, 19.34it/s, reward=-1.3, num_turns=1.68, num_tools=0.677, failed=0.355, completion_tokens=58.1] validation: 100%|██████████| 400/400 [00:52<00:00, 7.55it/s, reward=-1.3, num_turns=1.68, num_tools=0.677, failed=0.355, completion_tokens=58.1] + Val avg reward: -1.295 + +============================================================ +Step 6/50 +============================================================ + step 6: 0%| | 0/32 [00:00 exception=NotFoundError("Error code: 404 - {'error': {'message': 'The model `qwen-0.5b-tool-agent@8` does not exist.', 'type': 'NotFoundError', 'param': 'model', 'code': 404}}")> +Traceback (most recent call last): + File "/usr/lib/python3.12/asyncio/tasks.py", line 314, in __step_run_and_handle_result + result = coro.send(None) + ^^^^^^^^^^^^^^^ + File "/usr/local/lib/python3.12/dist-packages/art/local/backend.py", line 468, in _monitor_openai_server + raise e + File "/usr/local/lib/python3.12/dist-packages/art/local/backend.py", line 453, in _monitor_openai_server + await openai_client.completions.create( + File "/usr/local/lib/python3.12/dist-packages/openai/resources/completions.py", line 1109, in create + return await self._post( + ^^^^^^^^^^^^^^^^^ + File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1884, in post + return await self.request(cast_to, opts, stream=stream, stream_cls=stream_cls) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/usr/local/lib/python3.12/dist-packages/openai/_base_client.py", line 1669, in request + raise self._make_status_error_from_response(err.response) from None +openai.NotFoundError: Error code: 404 - {'error': {'message': 'The model `qwen-0.5b-tool-agent@8` does not exist.', 'type': 'NotFoundError', 'param': 'model', 'code': 404}} +Traceback (most recent call last): + File "", line 198, in _run_module_as_main + File "", line 88, in _run_code + File "/workspace/RL-Trained-Tool-Use-Agent/src/train.py", line 171, in + main() + File "/workspace/RL-Trained-Tool-Use-Agent/src/train.py", line 167, in main + asyncio.run(train(**kwargs)) + File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 30, in run + return loop.run_until_complete(task) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 98, in run_until_complete + return f.result() + ^^^^^^^^^^ + File "/usr/lib/python3.12/asyncio/futures.py", line 203, in result + raise self._exception.with_traceback(self._exception_tb) + File "/usr/lib/python3.12/asyncio/tasks.py", line 316, in __step_run_and_handle_result + result = coro.throw(exc) + ^^^^^^^^^^^^^^^ + File "/workspace/RL-Trained-Tool-Use-Agent/src/train.py", line 113, in train + result = await backend.train(model, train_groups, learning_rate=learning_rate) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/usr/local/lib/python3.12/dist-packages/art/local/backend.py", line 644, in train + async for metrics in self._train_model( + File "/usr/local/lib/python3.12/dist-packages/art/local/backend.py", line 783, in _train_model + async for result in service.train( + File "/usr/local/lib/python3.12/dist-packages/mp_actors/move.py", line 226, in async_gen_wrapper + send_value = yield await asyncio.wrap_future( + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/usr/lib/python3.12/asyncio/futures.py", line 287, in __await__ + yield self # This tells Task to wait for completion. + ^^^^^^^^^^ + File "/usr/lib/python3.12/asyncio/tasks.py", line 385, in __wakeup + future.result() + File "/usr/lib/python3.12/asyncio/futures.py", line 203, in result + raise self._exception.with_traceback(self._exception_tb) +RuntimeError: Proxy is closing + train: 100%|██████████| 3/3 [00:32<00:00, 10.79s/it, loss/train=1.03, loss/grad_norm=0.954, loss/learning_rate=5e-5, loss/entropy=1] +Skipping import of cpp extensions due to incompatible torch version 2.10.0+cu128 for torchao version 0.15.0 Please see https://github.com/pytorch/ao/issues/2919 for more info +Loaded 200 train, 50 val scenarios +GRPO config: 4 scenarios/step × 8 rollouts/scenario = 32 rollouts/step +Skipping import of cpp extensions due to incompatible torch version 2.10.0+cu128 for torchao version 0.15.0 Please see https://github.com/pytorch/ao/issues/2919 for more info +/usr/local/lib/python3.12/dist-packages/art/__init__.py:37: UserWarning: WARNING: Unsloth should be imported before [transformers] to ensure all optimizations are applied. Your code may run slower or encounter memory issues without these optimizations. + +Please restructure your imports with 'import unsloth' at the top of your file. + import unsloth # noqa: F401 +🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning. +🦥 Unsloth Zoo will now patch everything to make training faster! +==((====))== Unsloth 2026.3.3: Fast Qwen2 patching. Transformers: 5.2.0. vLLM: 0.17.0+art1. + \\ /| NVIDIA A100-SXM4-80GB. Num GPUs = 1. Max memory: 79.252 GB. Platform: Linux. +O^O/ \_/ \ Torch: 2.10.0+cu128. CUDA: 8.0. CUDA Toolkit: 12.8. Triton: 3.6.0 +\ / Bfloat16 = TRUE. FA [Xformers = 0.0.35. FA2 = False] + "-____-" Free license: http://github.com/unslothai/unsloth +Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored! + Loading weights: 0%| | 0/290 [00:00. +Unsloth 2026.3.3 patched 24 layers with 24 QKV layers, 24 O layers and 24 MLP layers. +Warning: `huggingface-cli` is deprecated and no longer works. Use `hf` instead. + +Hint: `hf` is already installed! Use it directly. + +Hint: Examples: + hf auth login + hf download unsloth/gemma-4-31B-it-GGUF + hf upload my-cool-model . . + hf models ls --search "gemma" + hf repos ls --format json + hf jobs run python:3.12 python -c 'print("Hello!")' + hf --help + +INFO 04-13 02:36:48 [model.py:531] Resolved architecture: Qwen2ForCausalLM +INFO 04-13 02:36:48 [model.py:1554] Using max model len 32768 +INFO 04-13 02:36:48 [scheduler.py:231] Chunked prefill is enabled with max_num_batched_tokens=2048. +INFO 04-13 02:36:48 [vllm.py:747] Asynchronous scheduling is enabled. +WARNING 04-13 02:36:50 [system_utils.py:152] We must use the `spawn` multiprocessing start method. Overriding VLLM_WORKER_MULTIPROC_METHOD to 'spawn'. See https://docs.vllm.ai/en/latest/usage/troubleshooting.html#python-multiprocessing for more information. Reasons: CUDA is initialized +Skipping import of cpp extensions due to incompatible torch version 2.10.0+cu128 for torchao version 0.15.0 Please see https://github.com/pytorch/ao/issues/2919 for more info +/usr/local/lib/python3.12/dist-packages/art/__init__.py:37: UserWarning: WARNING: Unsloth should be imported before [transformers] to ensure all optimizations are applied. Your code may run slower or encounter memory issues without these optimizations. + +Please restructure your imports with 'import unsloth' at the top of your file. + import unsloth # noqa: F401 +🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning. +🦥 Unsloth Zoo will now patch everything to make training faster! +(EngineCore_DP0 pid=15589) INFO 04-13 02:37:15 [core.py:101] Initializing a V1 LLM engine (v0.17.0+art1) with config: model='Qwen/Qwen2.5-0.5B-Instruct', speculative_config=None, tokenizer='Qwen/Qwen2.5-0.5B-Instruct', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, tokenizer_revision=None, trust_remote_code=False, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=auto, tensor_parallel_size=1, pipeline_parallel_size=1, data_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=False, enable_return_routed_experts=False, kv_cache_dtype=auto, device_config=cuda, structured_outputs_config=StructuredOutputsConfig(backend='auto', disable_fallback=False, disable_any_whitespace=False, disable_additional_properties=False, reasoning_parser='', reasoning_parser_plugin='', enable_in_reasoning=False), observability_config=ObservabilityConfig(show_hidden_metrics_for_version=None, otlp_traces_endpoint=None, collect_detailed_traces=None, kv_cache_metrics=False, kv_cache_metrics_sample=0.01, cudagraph_metrics=False, enable_layerwise_nvtx_tracing=False, enable_mfu_metrics=False, enable_mm_processor_stats=False, enable_logging_iteration_details=False), seed=0, served_model_name=Qwen/Qwen2.5-0.5B-Instruct, enable_prefix_caching=True, enable_chunked_prefill=True, pooler_config=None, compilation_config={'level': None, 'mode': , 'debug_dump_path': None, 'cache_dir': '', 'compile_cache_save_format': 'binary', 'backend': 'inductor', 'custom_ops': ['none'], 'splitting_ops': ['vllm::unified_attention', 'vllm::unified_attention_with_output', 'vllm::unified_mla_attention', 'vllm::unified_mla_attention_with_output', 'vllm::mamba_mixer2', 'vllm::mamba_mixer', 'vllm::short_conv', 'vllm::linear_attention', 'vllm::plamo2_mamba_mixer', 'vllm::gdn_attention_core', 'vllm::kda_attention', 'vllm::sparse_attn_indexer', 'vllm::rocm_aiter_sparse_attn_indexer', 'vllm::unified_kv_cache_update', 'vllm::unified_mla_kv_cache_update'], 'compile_mm_encoder': False, 'compile_sizes': [], 'compile_ranges_split_points': [2048], 'inductor_compile_config': {'enable_auto_functionalized_v2': False, 'combo_kernels': True, 'benchmark_combo_kernel': True}, 'inductor_passes': {}, 'cudagraph_mode': , 'cudagraph_num_of_warmups': 1, 'cudagraph_capture_sizes': [1, 2, 4, 8, 16, 24, 32, 40, 48, 56, 64, 72, 80, 88, 96, 104, 112, 120, 128, 136, 144, 152, 160, 168, 176, 184, 192, 200, 208, 216, 224, 232, 240, 248, 256], 'cudagraph_copy_inputs': False, 'cudagraph_specialize_lora': True, 'use_inductor_graph_partition': False, 'pass_config': {'fuse_norm_quant': False, 'fuse_act_quant': False, 'fuse_attn_quant': False, 'enable_sp': False, 'fuse_gemm_comms': False, 'fuse_allreduce_rms': False}, 'max_cudagraph_capture_size': 256, 'dynamic_shapes_config': {'type': , 'evaluate_guards': False, 'assume_32_bit_indexing': False}, 'local_cache_dir': None, 'fast_moe_cold_start': True, 'static_all_moe_layers': []} +(EngineCore_DP0 pid=15589) INFO 04-13 02:37:15 [worker_base.py:283] Injected into for extended collective_rpc calls ['run', 'time'] +(EngineCore_DP0 pid=15589) INFO 04-13 02:37:15 [parallel_state.py:1393] world_size=1 rank=0 local_rank=0 distributed_init_method=tcp://172.21.0.2:42797 backend=nccl +(EngineCore_DP0 pid=15589) INFO 04-13 02:37:15 [parallel_state.py:1715] rank 0 in world size 1 is assigned as DP rank 0, PP rank 0, PCP rank 0, TP rank 0, EP rank N/A, EPLB rank N/A +(EngineCore_DP0 pid=15589) INFO 04-13 02:37:16 [base.py:106] Offloader set to NoopOffloader +(EngineCore_DP0 pid=15589) INFO 04-13 02:37:16 [gpu_model_runner.py:4255] Starting to load model Qwen/Qwen2.5-0.5B-Instruct... +(EngineCore_DP0 pid=15589) INFO 04-13 02:37:17 [cuda.py:405] Using FLASH_ATTN attention backend out of potential backends: ['FLASH_ATTN', 'FLASHINFER', 'TRITON_ATTN', 'FLEX_ATTENTION']. +(EngineCore_DP0 pid=15589) INFO 04-13 02:37:17 [flash_attn.py:587] Using FlashAttention version 2 +(EngineCore_DP0 pid=15589) :1297: FutureWarning: The cuda.cudart module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.runtime module instead. +(EngineCore_DP0 pid=15589) :1297: FutureWarning: The cuda.nvrtc module is deprecated and will be removed in a future release, please switch to use the cuda.bindings.nvrtc module instead. +(EngineCore_DP0 pid=15589) INFO 04-13 02:37:18 [weight_utils.py:601] No model.safetensors.index.json found in remote. +(EngineCore_DP0 pid=15589) Loading safetensors checkpoint shards: 0% Completed | 0/1 [00:00", line 198, in _run_module_as_main + File "", line 88, in _run_code + File "/workspace/RL-Trained-Tool-Use-Agent/src/train.py", line 211, in + main() + File "/workspace/RL-Trained-Tool-Use-Agent/src/train.py", line 207, in main + asyncio.run(train(**kwargs)) + File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 30, in run + return loop.run_until_complete(task) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/usr/local/lib/python3.12/dist-packages/nest_asyncio.py", line 98, in run_until_complete + return f.result() + ^^^^^^^^^^ + File "/usr/lib/python3.12/asyncio/futures.py", line 203, in result + raise self._exception.with_traceback(self._exception_tb) + File "/usr/lib/python3.12/asyncio/tasks.py", line 316, in __step_run_and_handle_result + result = coro.throw(exc) + ^^^^^^^^^^^^^^^ + File "/workspace/RL-Trained-Tool-Use-Agent/src/train.py", line 153, in train + result = await backend.train(model, train_groups, learning_rate=learning_rate) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/usr/local/lib/python3.12/dist-packages/art/local/backend.py", line 644, in train + async for metrics in self._train_model( + File "/usr/local/lib/python3.12/dist-packages/art/local/backend.py", line 783, in _train_model + async for result in service.train( + File "/usr/local/lib/python3.12/dist-packages/mp_actors/move.py", line 226, in async_gen_wrapper + send_value = yield await asyncio.wrap_future( + ^^^^^^^^^^^^^^^^^^^^^^^^^^ + File "/usr/lib/python3.12/asyncio/futures.py", line 287, in __await__ + yield self # This tells Task to wait for completion. + ^^^^^^^^^^ + File "/usr/lib/python3.12/asyncio/tasks.py", line 385, in __wakeup + future.result() + File "/usr/lib/python3.12/asyncio/futures.py", line 203, in result + raise self._exception.with_traceback(self._exception_tb) +RuntimeError: Proxy is closing + train: 0%| | 0/2 [01:06system\n' }} + {%- if messages[0]['role'] == 'system' %} + {{- messages[0]['content'] }} + {%- else %} + {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }} + {%- endif %} + {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within XML tags:\n" }} + {%- for tool in tools %} + {{- "\n" }} + {{- tool | tojson }} + {%- endfor %} + {{- "\n\n\nFor each function call, return a json object with function name and arguments within XML tags:\n\n{\"name\": , \"arguments\": }\n<|im_end|>\n" }} +{%- else %} + {%- if messages[0]['role'] == 'system' %} + {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }} + {%- else %} + {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }} + {%- endif %} +{%- endif %} +{%- for message in messages %} + {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %} + {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }} + {%- elif message.role == "assistant" %} + {{- '<|im_start|>' + message.role }} + {%- if message.content %} + {{- '\n' + message.content }} + {%- endif %} + {%- for tool_call in message.tool_calls %} + {%- if tool_call.function is defined %} + {%- set tool_call = tool_call.function %} + {%- endif %} + {{- '\n\n{"name": "' }} + {{- tool_call.name }} + {{- '", "arguments": ' }} + {{- tool_call.arguments | tojson }} + {{- '}\n' }} + {%- endfor %} + {{- '<|im_end|>\n' }} + {%- elif message.role == "tool" %} + {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %} + {{- '<|im_start|>user' }} + {%- endif %} + {{- '\n\n' }} + {{- message.content }} + {{- '\n' }} + {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %} + {{- '<|im_end|>\n' }} + {%- endif %} + {%- endif %} +{%- endfor %} +{%- if add_generation_prompt %} + {{- '<|im_start|>assistant\n' }} +{%- endif %} diff --git a/config.json b/config.json new file mode 100644 index 0000000..e2f4dfb --- /dev/null +++ b/config.json @@ -0,0 +1,57 @@ +{ + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "dtype": "bfloat16", + "eos_token_id": 151645, + "hidden_act": "silu", + "hidden_size": 896, + "initializer_range": 0.02, + "intermediate_size": 4864, + "layer_types": [ + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention", + "full_attention" + ], + "max_position_embeddings": 32768, + "max_window_layers": 21, + "model_type": "qwen2", + "num_attention_heads": 14, + "num_hidden_layers": 24, + "num_key_value_heads": 2, + "pad_token_id": null, + "rms_norm_eps": 1e-06, + "rope_parameters": { + "rope_theta": 1000000.0, + "rope_type": "default" + }, + "sliding_window": null, + "tie_word_embeddings": true, + "transformers_version": "5.2.0", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 151936 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..8d23c88 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,14 @@ +{ + "bos_token_id": 151643, + "do_sample": true, + "eos_token_id": [ + 151645, + 151643 + ], + "pad_token_id": 151643, + "repetition_penalty": 1.1, + "temperature": 0.7, + "top_k": 20, + "top_p": 0.8, + "transformers_version": "5.2.0" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..9a10cc7 --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1ce74637a68f2305f02771cfbf5336782b145f388fae4f87b1ef1b1f84b08c6 +size 988097824 diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..5340d81 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd5948af71b4f56cf697f7580814c7ce8b80595ef985544efcacf716126a2e31 +size 11422356 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..26510ce --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,15 @@ +{ + "add_prefix_space": false, + "backend": "tokenizers", + "bos_token": null, + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "errors": "replace", + "is_local": true, + "model_max_length": 32768, + "pad_token": "<|PAD_TOKEN|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "Qwen2Tokenizer", + "unk_token": null +}