90 lines
2.5 KiB
YAML
90 lines
2.5 KiB
YAML
mode: train
|
|
experiment:
|
|
dataset_size: 6000
|
|
dataset_seed: 1234
|
|
test_size: 0.1
|
|
hf_token: ${oc.env:HF_TOKEN,null}
|
|
output:
|
|
root_path: ${oc.env:ROOT_PATH}
|
|
run_name: ${model.trim}_${task.name}_${algorithm.name}_${algorithm.training.curriculum_schedule}_${algorithm.training.scheduler_params.mu_exp}_${algorithm.training.scheduler_params.sigma}_SEC${algorithm.training.scheduler_params.vrex_adds.sec}DRO${algorithm.training.scheduler_params.vrex_adds.groupdro}G${algorithm.training.scheduler_params.vrex_adds.gaussian}_minp${algorithm.training.scheduler_params.min_prob}${ckpt2short:${algorithm.training.resume_from_checkpoint}}_${algorithm.training.max_steps}
|
|
lora:
|
|
r: 32
|
|
alpha: 64
|
|
dropout: 0.1
|
|
target_modules:
|
|
- q_proj
|
|
- v_proj
|
|
task_type: CAUSAL_LM
|
|
occupy_gpu_memory: false
|
|
occupy_gpu_memory_gb: 50
|
|
gpu_device: cuda:0
|
|
model:
|
|
family: Qwen
|
|
trim: Qwen2.5-1.5B-Instruct
|
|
name: ${model.family}/${model.trim}
|
|
trust_remote_code: true
|
|
torch_dtype: bfloat16
|
|
attn_implementation: flash_attention_2
|
|
task:
|
|
name: countdown2345
|
|
data_files:
|
|
- citrinegui/countdown_n2t100_1-100
|
|
- citrinegui/countdown_n3t100_1-100
|
|
- citrinegui/countdown_n4t100_1-100
|
|
- citrinegui/countdown_n5t100_1-100
|
|
test_file: citrinegui/countdown_n6t100_1-100
|
|
force_redownload: false
|
|
train_size: 327680
|
|
test_size: 1024
|
|
training:
|
|
max_prompt_length: 1000
|
|
max_completion_length: 512
|
|
inference:
|
|
checkpoint: outputs/Qwen2.5-1.5B-Instruct_countdown2345_grpo_balanced_0.5_0.5_SEC0.3DRO1.0G0.0_minpTrue_1600/checkpoint-1600/
|
|
temperature: 0.0
|
|
sc_num: 1
|
|
pass_at_k: 1
|
|
resume: 0
|
|
max_new_tokens: 512
|
|
batch_size: 32
|
|
algorithm:
|
|
name: grpo
|
|
training:
|
|
resume_from_checkpoint: null
|
|
learning_rate: 1.0e-06
|
|
lr_scheduler_type: cosine
|
|
logging_steps: 10
|
|
max_steps: 1600
|
|
per_device_train_batch_size: 16
|
|
generation_batch_size: null
|
|
steps_per_generation: 1
|
|
gradient_accumulation_steps: 4
|
|
gradient_checkpointing: true
|
|
bf16: true
|
|
report_to:
|
|
- wandb
|
|
push_to_hub: true
|
|
save_strategy: steps
|
|
save_steps: ${algorithm.training.max_steps}
|
|
tf32: true
|
|
num_generations: 8
|
|
beta: 0.001
|
|
use_vllm: true
|
|
vllm_mode: colocate
|
|
vllm_gpu_memory_utilization: 0.3
|
|
vllm_server_port: 8000
|
|
curriculum: false
|
|
curriculum_schedule: gaussian
|
|
scheduler_params:
|
|
mu_exp: 0.5
|
|
sigma: 0.5
|
|
vrex_adds:
|
|
groupdro: 1.0
|
|
gaussian: 0.0
|
|
sec: 0.3
|
|
beta: 1.0
|
|
min_prob: true
|
|
td_alpha: 0.5
|
|
sec_temperature: 0.3
|
|
max_dapo_iter: 2
|