初始化项目,由ModelHub XC社区提供模型
Model: heavycoderhh/counsel-env-qwen3-0.6b-grpo Source: Original Platform
This commit is contained in:
18
training_summary.json
Normal file
18
training_summary.json
Normal file
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"artifact_repo": "heavycoderhh/counsel-env-qwen3-0.6b-grpo",
|
||||
"dataset_size": 256,
|
||||
"env_url": "https://heavycoderhh-counsel-env.hf.space",
|
||||
"max_completion_length": 512,
|
||||
"max_steps": 200,
|
||||
"metrics": {
|
||||
"total_flos": 0.0,
|
||||
"train_loss": -0.0162161529250443,
|
||||
"train_runtime": 4111.2914,
|
||||
"train_samples_per_second": 0.195,
|
||||
"train_steps_per_second": 0.049
|
||||
},
|
||||
"model": "Qwen/Qwen3-0.6B",
|
||||
"num_generations": 4,
|
||||
"space_repo": "heavycoderhh/counsel-env",
|
||||
"use_vllm": false
|
||||
}
|
||||
Reference in New Issue
Block a user