{ "model_name_or_path": "Qwen/Qwen3-8B", "ref_model_name_or_path": "Qwen/Qwen3-8B", "train_dataset_path": "../data/SOCIALIQA/train_dpo.jsonl", "eval_dataset_path": "", "split_mode": "train_only(val_ratio=0.0500)", "val_ratio": 0.05, "learning_rate": 5e-07, "beta": 0.1, "epochs": 1.0, "warmup_steps": 150, "max_length": 512, "max_prompt_length": 384, "global_batch_size": 16, "per_device_train_batch_size": 1, "gradient_accumulation_steps": 16, "optimizer": "rmsprop", "freeze": { "stack_name": "model.layers", "num_layers": 36, "selected_layer_indices_0based": [ 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35 ], "selected_layer_indices_1based": [ 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36 ], "final_norm_modules": [], "lm_head_enabled": false, "trainable_params": 5402500096, "total_params": 8190735360, "trainable_ratio": 0.6595866986966112 }, "num_train_examples": 31740, "num_eval_examples": 1670, "precompute_ref_log_probs": true, "precompute_ref_batch_size": 1, "use_logits_to_keep": true, "torch_empty_cache_steps": 0 }