Files
Qwen2.5-1.5B-ReMax-math-rea…/training_config.yaml

289 lines
6.2 KiB
YAML
Raw Normal View History

finetune:
data: null
model_class: causal-language-modeling
config_name: ${..model_path}
optim: adamw_torch
load_as_bf16: true
fp32_lm_head: ${..fp32_lm_head}
fp32_layer_prefix: ${..fp32_layer_prefix}
use_flash_attention: true
attn_implementation: flash_attention_2
auto_device_map: false
lora:
enabled: false
task_type: CAUSAL_LM
base_model_8bit: false
base_model_4bit: false
r: 16
alpha: 16
dropout: 0.05
bias: none
target_modules: []
force_restart: ${..force_restart}
resume_dataloader: false
train_batch_size: 4
valid_batch_size: 4
weight_decay: 0.01
learning_rate: 1.0e-06
gradient_clipping_threshold: 0.3
lr_scheduler_type: cosine
num_warmup_steps: 25
gradient_accumulation_passes: 64
gradient_checkpointing: true
reentrant_checkpointing: false
max_train_steps: 1500
interrupt_train_steps: -1
max_eval_steps: -1
seq_length: 8192
seq_packing: true
output_dir: ${..output_dir}/finetune
seed: ${..seed}
save_checkpoint_steps: 100
keep_intermediate_checkpoints: true
trust_remote_code: false
cuda_empty_cache: true
sft_config_name: null
n_examples: 0
log_each_n_steps: 1
also_save_steps: []
use_safetensors: true
save_final_training_state: true
seq_parallel: 1
objective: rl
input: training_data
send_weight_updates: true
queue_size: 32
max_lag: null
weight_update_interval: 1
pop_old_data: ${..pop_old_data}
attempts: 8
eval_callback:
_target_: pipelinerl.finetune.utils.dummy_eval_callback
config_name: ''
rl:
policy_loss: ppo
divide_advantage_by_std: false
kl_coef: 0.0
final_kl_coef: 0.0
entropy_bonus: 0.0
reward_minus_kl_coef: 0.0
epsilon_low: 0.2
epsilon_high: 0.2
use_advantages: true
relu_log_p_weights: false
clamp_log_ratio_ref_new_value: 5
temperature: ${...llm.parameters.temperature}
aggregate_loss: sum
overlong_filtering: false
adv_estimator: remax
gamma: 1.0
filter_zero_advantage_groups: false
rewards:
correct_answer_finished: 1.0
correct_answer_not_finished: 1.0
wrong_answer_finished: 0
wrong_answer_not_finished: 0
no_answer_finished: 0
no_answer_not_finished: 0
unparsable_finished: 0
unparsable_not_finished: 0
streams:
backend: files
seed: 42
fp32_lm_head: false
fp32_layer_prefix: lm_head
actor:
log_each_n_secs: 0
llm_max_rollouts: 256
rollout_workers: 1
discount_factor: 1
problem_queue_size: 256
result_queue_size: 256
throughput_window_size: 50
shared_memory_entry_size: 10000000
rollout_policy: pipelinerl.domains.math.generate_math_rollout
system_prompt: Please reason step by step, and put your final answer within \boxed{}.
task_template: '{task}'
task_prompt: ''
environment: null
preprocess:
input: actor
output: training_data
n_workers: 8
chunk_n_groups: 2
raw_queue_size: 8
input_queue_size: 32
output_queue_size: 32
dataset_buffer_size: 0
ring_buffer_size: 128
max_ready_samples_per_lead: 64
pop_old_data: ${..pop_old_data}
shared_memory_entry_size: 100000000
log_every_n_samples: 128
llm:
parameters:
max_tokens: 4096
temperature: 1.0
test_llm:
parameters:
max_tokens: 4096
temperature: 1.0
top_p: 0.95
top_k: 50
vllm_config:
use_v1: false
quantization: null
vllm_kwargs:
dtype: bfloat16
gpu-memory-utilization: 0.92
max-num-seqs: 64
max-num-batched-tokens: 16384
enable-chunked-prefill: ''
return-tokens-as-token-ids: ''
tensor-parallel-size: 1
pipeline-parallel-size: 1
generation-config: vllm
max_model_len: 8192
num-scheduler-steps: 8
disable-log-requests: ''
disable-frontend-multiprocessing: ''
world:
replicas: 1
actor_fraction: 3
preprocessor_fraction: 0
finetune_fraction: 1
env_replicas: 1
actor_group_port: 9000
environment_start_port: 7777
jobs:
- kind: actor_llm
idx: 0
replica_idx: 0
local_idx: 0
node_rank: 0
hostname: localhost
port: 8080
gpus:
- 0
url: http://localhost:8080
environment_key: null
environment_index: null
- kind: actor_llm
idx: 1
replica_idx: 1
local_idx: 1
node_rank: 0
hostname: localhost
port: 8081
gpus:
- 1
url: http://localhost:8081
environment_key: null
environment_index: null
- kind: actor_llm
idx: 2
replica_idx: 2
local_idx: 2
node_rank: 0
hostname: localhost
port: 8082
gpus:
- 2
url: http://localhost:8082
environment_key: null
environment_index: null
- kind: actor
idx: 3
replica_idx: 0
local_idx: 0
node_rank: 0
hostname: localhost
port: null
gpus: []
url: ''
environment_key: null
environment_index: null
- kind: preprocessor
idx: 4
replica_idx: 0
local_idx: 0
node_rank: 0
hostname: localhost
port: null
gpus: []
url: ''
environment_key: null
environment_index: null
- kind: environment
idx: 5
replica_idx: 0
local_idx: 0
node_rank: 0
hostname: localhost
port: 7777
gpus: []
url: ''
environment_key: math
environment_index: 0
- kind: finetune
idx: 6
replica_idx: 0
local_idx: 0
node_rank: 0
hostname: localhost
port: null
gpus:
- 3
url: ''
environment_key: null
environment_index: null
eval_every_n_versions: 78000
model_path: Qwen/Qwen2.5-1.5B
accelerate_config: null
use_deepspeed: true
deepspeed_config: deepspeed_stage3_bf16
use_fsdp: false
fsdp:
param_dtype: fp32
reduce_dtype: fp32
buffer_dtype: fp32
output_dir: results/qwen2.5_1.5b_remax_3a1f_4xh100_214754
force_restart: false
pop_old_data: true
max_lag: null
attempts: 16
train_subset: null
debug:
mode: ''
streams_from: null
place_inference_workers: true
use_existing_llms: false
me:
job_idx: null
wandb:
use_wandb: true
fail_on_init_error: false
init_timeout: 120
wandb_id: null
wandb_name: null
wandb_entity_name: jaygala24-team
wandb_project_name: rl-post-training
wandb_resume: always
wandb_use_basename: true
wandb_workspace_root: results
wandb_group: qwen2.5_1.5b_remax_3a1f_4xh100_214754
wandb_dir: null
tags: []
environments:
- key: math
mode: remote
_target_: pipelinerl.domains.math.MathEnvironment
environment_key: math
dataset_loader: pipelinerl.domains.math.load_datasets
train_dataset_names:
- gsm8k_train
- math_train
test_dataset_names:
- gsm8k_test
- math_500