{ "artifact_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "dataset_size": 256, "env_url": "https://heavycoderhh-counsel-env.hf.space", "max_completion_length": 512, "max_steps": 200, "metrics": { "total_flos": 0.0, "train_loss": -0.0162161529250443, "train_runtime": 4111.2914, "train_samples_per_second": 0.195, "train_steps_per_second": 0.049 }, "model": "Qwen/Qwen3-0.6B", "num_generations": 4, "space_repo": "heavycoderhh/counsel-env", "use_vllm": false }