adam_beta2: 0.98 assistant_tag: assistant attn: fa2 bf16: true content_tag: content cutoff_len: 32768 dataloader_num_workers: 4 dataloader_persistent_workers: true dataloader_pin_memory: true dataset: /e/scratch/jureap59/raoof1/sft_data/hf_hub/datasets--DCAgent--g1_min_episodes_e1_gpt_long_top8_glm47_traces/snapshots/9828cc7d5cb31c19ed7e6dead76bd24dc2d66262_thinking_preprocessed dataset_dir: ONLINE datasets_cache_dir: /e/scratch/jureap59/raoof1/sft_data/arrow_cache ddp_timeout: 180000000 deepspeed: sft/lf_configs/deepspeed/ds_z3_accelerate.json do_train: true enable_liger_kernel: true finetuning_type: full formatting: sharegpt gradient_accumulation_steps: 1 gradient_checkpointing: true hub_model_id: DCAgent/g1_gptlong_top8_32b include_mfu: true learning_rate: 4.0e-05 load_best_model_at_end: false logging_steps: 5 logging_strategy: steps lr_scheduler_type: cosine max_grad_norm: 0.001 messages: conversations model_name_or_path: /e/scratch/jureap59/raoof1/sft_data/hf_hub/models--Qwen--Qwen3-32B/snapshots/9216db5781bf21249d130ec9da846c4624c16137 num_train_epochs: 5.0 optim: adamw_torch_fused output_dir: /e/scratch/jureap59/raoof1/sft_data/checkpoints/sft_g1_gptlong_top8_32b__Qwen3-32B overwrite_cache: true per_device_train_batch_size: 1 plot_loss: true preprocessing_num_workers: 16 pure_bf16: false push_to_hub: false role_tag: role run_name: g1_gptlong_top8_32b__Qwen3-8B save_steps: 300 save_strategy: steps save_total_limit: 1 seed: 42 stage: sft template: qwen3 trust_remote_code: true user_tag: user warmup_ratio: 0.1 weight_decay: 0.04 disable_shuffling: true