{ "artifact_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo-run2", "env_url": "https://heavycoderhh-counsel-env.hf.space", "evidence_pressure": 1.4, "grpo_dataset_size": 160, "grpo_learning_rate": 5e-06, "grpo_max_steps": 250, "max_completion_length": 320, "metrics": { "total_flos": 0.0, "train_loss": -0.00972448107972741, "train_runtime": 4103.8977, "train_samples_per_second": 0.244, "train_steps_per_second": 0.061 }, "model": "heavycoderhh/counsel-env-qwen3-0.6b-grpo", "num_generations": 4, "sft_dataset_size": 320, "sft_dir": "/tmp/counsel-sft-grpo-output/sft_warm_start", "sft_epochs": 1.0, "sft_learning_rate": 1e-05, "space_repo": "heavycoderhh/counsel-env" }