289 lines
6.2 KiB
YAML
289 lines
6.2 KiB
YAML
|
|
finetune:
|
||
|
|
data: null
|
||
|
|
model_class: causal-language-modeling
|
||
|
|
config_name: ${..model_path}
|
||
|
|
optim: adamw_torch
|
||
|
|
load_as_bf16: true
|
||
|
|
fp32_lm_head: ${..fp32_lm_head}
|
||
|
|
fp32_layer_prefix: ${..fp32_layer_prefix}
|
||
|
|
use_flash_attention: true
|
||
|
|
attn_implementation: flash_attention_2
|
||
|
|
auto_device_map: false
|
||
|
|
lora:
|
||
|
|
enabled: false
|
||
|
|
task_type: CAUSAL_LM
|
||
|
|
base_model_8bit: false
|
||
|
|
base_model_4bit: false
|
||
|
|
r: 16
|
||
|
|
alpha: 16
|
||
|
|
dropout: 0.05
|
||
|
|
bias: none
|
||
|
|
target_modules: []
|
||
|
|
force_restart: ${..force_restart}
|
||
|
|
resume_dataloader: false
|
||
|
|
train_batch_size: 2
|
||
|
|
valid_batch_size: 4
|
||
|
|
weight_decay: 0.01
|
||
|
|
learning_rate: 1.0e-06
|
||
|
|
gradient_clipping_threshold: 0.3
|
||
|
|
lr_scheduler_type: cosine
|
||
|
|
num_warmup_steps: 25
|
||
|
|
gradient_accumulation_passes: 128
|
||
|
|
gradient_checkpointing: true
|
||
|
|
reentrant_checkpointing: false
|
||
|
|
max_train_steps: 1500
|
||
|
|
interrupt_train_steps: -1
|
||
|
|
max_eval_steps: -1
|
||
|
|
seq_length: 8192
|
||
|
|
seq_packing: true
|
||
|
|
output_dir: ${..output_dir}/finetune
|
||
|
|
seed: ${..seed}
|
||
|
|
save_checkpoint_steps: 100
|
||
|
|
keep_intermediate_checkpoints: true
|
||
|
|
trust_remote_code: false
|
||
|
|
cuda_empty_cache: true
|
||
|
|
sft_config_name: null
|
||
|
|
n_examples: 0
|
||
|
|
log_each_n_steps: 1
|
||
|
|
also_save_steps: []
|
||
|
|
use_safetensors: true
|
||
|
|
save_final_training_state: true
|
||
|
|
seq_parallel: 1
|
||
|
|
objective: rl
|
||
|
|
input: training_data
|
||
|
|
send_weight_updates: true
|
||
|
|
queue_size: 32
|
||
|
|
max_lag: null
|
||
|
|
weight_update_interval: 1
|
||
|
|
pop_old_data: ${..pop_old_data}
|
||
|
|
attempts: 8
|
||
|
|
eval_callback:
|
||
|
|
_target_: pipelinerl.finetune.utils.dummy_eval_callback
|
||
|
|
config_name: ''
|
||
|
|
rl:
|
||
|
|
policy_loss: ppo
|
||
|
|
divide_advantage_by_std: false
|
||
|
|
kl_coef: 0.0
|
||
|
|
final_kl_coef: 0.0
|
||
|
|
entropy_bonus: 0.0
|
||
|
|
reward_minus_kl_coef: 0.0
|
||
|
|
epsilon_low: 0.2
|
||
|
|
epsilon_high: 0.2
|
||
|
|
use_advantages: true
|
||
|
|
relu_log_p_weights: false
|
||
|
|
clamp_log_ratio_ref_new_value: 5
|
||
|
|
temperature: ${...llm.parameters.temperature}
|
||
|
|
aggregate_loss: sum
|
||
|
|
overlong_filtering: false
|
||
|
|
adv_estimator: remax
|
||
|
|
gamma: 1.0
|
||
|
|
filter_zero_advantage_groups: false
|
||
|
|
rewards:
|
||
|
|
correct_answer_finished: 1.0
|
||
|
|
correct_answer_not_finished: 1.0
|
||
|
|
wrong_answer_finished: 0
|
||
|
|
wrong_answer_not_finished: 0
|
||
|
|
no_answer_finished: 0
|
||
|
|
no_answer_not_finished: 0
|
||
|
|
unparsable_finished: 0
|
||
|
|
unparsable_not_finished: 0
|
||
|
|
streams:
|
||
|
|
backend: files
|
||
|
|
seed: 42
|
||
|
|
fp32_lm_head: false
|
||
|
|
fp32_layer_prefix: lm_head
|
||
|
|
actor:
|
||
|
|
log_each_n_secs: 0
|
||
|
|
llm_max_rollouts: 256
|
||
|
|
rollout_workers: 1
|
||
|
|
discount_factor: 1
|
||
|
|
problem_queue_size: 256
|
||
|
|
result_queue_size: 256
|
||
|
|
throughput_window_size: 50
|
||
|
|
shared_memory_entry_size: 10000000
|
||
|
|
rollout_policy: pipelinerl.domains.math.generate_math_rollout
|
||
|
|
system_prompt: Please reason step by step, and put your final answer within \boxed{}.
|
||
|
|
task_template: '{task}'
|
||
|
|
task_prompt: ''
|
||
|
|
environment: null
|
||
|
|
preprocess:
|
||
|
|
input: actor
|
||
|
|
output: training_data
|
||
|
|
n_workers: 8
|
||
|
|
chunk_n_groups: 2
|
||
|
|
raw_queue_size: 8
|
||
|
|
input_queue_size: 32
|
||
|
|
output_queue_size: 32
|
||
|
|
dataset_buffer_size: 0
|
||
|
|
ring_buffer_size: 128
|
||
|
|
max_ready_samples_per_lead: 64
|
||
|
|
pop_old_data: ${..pop_old_data}
|
||
|
|
shared_memory_entry_size: 100000000
|
||
|
|
log_every_n_samples: 128
|
||
|
|
llm:
|
||
|
|
parameters:
|
||
|
|
max_tokens: 4096
|
||
|
|
temperature: 1.0
|
||
|
|
test_llm:
|
||
|
|
parameters:
|
||
|
|
max_tokens: 4096
|
||
|
|
temperature: 1.0
|
||
|
|
top_p: 0.95
|
||
|
|
top_k: 50
|
||
|
|
vllm_config:
|
||
|
|
use_v1: false
|
||
|
|
quantization: null
|
||
|
|
vllm_kwargs:
|
||
|
|
dtype: bfloat16
|
||
|
|
gpu-memory-utilization: 0.92
|
||
|
|
max-num-seqs: 64
|
||
|
|
max-num-batched-tokens: 16384
|
||
|
|
enable-chunked-prefill: ''
|
||
|
|
return-tokens-as-token-ids: ''
|
||
|
|
tensor-parallel-size: 1
|
||
|
|
pipeline-parallel-size: 1
|
||
|
|
generation-config: vllm
|
||
|
|
max_model_len: 8192
|
||
|
|
num-scheduler-steps: 8
|
||
|
|
disable-log-requests: ''
|
||
|
|
disable-frontend-multiprocessing: ''
|
||
|
|
world:
|
||
|
|
replicas: 1
|
||
|
|
actor_fraction: 6
|
||
|
|
preprocessor_fraction: 0
|
||
|
|
finetune_fraction: 2
|
||
|
|
env_replicas: 1
|
||
|
|
actor_group_port: 9000
|
||
|
|
environment_start_port: 7777
|
||
|
|
jobs:
|
||
|
|
- kind: actor_llm
|
||
|
|
idx: 0
|
||
|
|
replica_idx: 0
|
||
|
|
local_idx: 0
|
||
|
|
node_rank: 0
|
||
|
|
hostname: localhost
|
||
|
|
port: 8080
|
||
|
|
gpus:
|
||
|
|
- 0
|
||
|
|
url: http://localhost:8080
|
||
|
|
environment_key: null
|
||
|
|
environment_index: null
|
||
|
|
- kind: actor_llm
|
||
|
|
idx: 1
|
||
|
|
replica_idx: 1
|
||
|
|
local_idx: 1
|
||
|
|
node_rank: 0
|
||
|
|
hostname: localhost
|
||
|
|
port: 8081
|
||
|
|
gpus:
|
||
|
|
- 1
|
||
|
|
url: http://localhost:8081
|
||
|
|
environment_key: null
|
||
|
|
environment_index: null
|
||
|
|
- kind: actor_llm
|
||
|
|
idx: 2
|
||
|
|
replica_idx: 2
|
||
|
|
local_idx: 2
|
||
|
|
node_rank: 0
|
||
|
|
hostname: localhost
|
||
|
|
port: 8082
|
||
|
|
gpus:
|
||
|
|
- 2
|
||
|
|
url: http://localhost:8082
|
||
|
|
environment_key: null
|
||
|
|
environment_index: null
|
||
|
|
- kind: actor
|
||
|
|
idx: 3
|
||
|
|
replica_idx: 0
|
||
|
|
local_idx: 0
|
||
|
|
node_rank: 0
|
||
|
|
hostname: localhost
|
||
|
|
port: null
|
||
|
|
gpus: []
|
||
|
|
url: ''
|
||
|
|
environment_key: null
|
||
|
|
environment_index: null
|
||
|
|
- kind: preprocessor
|
||
|
|
idx: 4
|
||
|
|
replica_idx: 0
|
||
|
|
local_idx: 0
|
||
|
|
node_rank: 0
|
||
|
|
hostname: localhost
|
||
|
|
port: null
|
||
|
|
gpus: []
|
||
|
|
url: ''
|
||
|
|
environment_key: null
|
||
|
|
environment_index: null
|
||
|
|
- kind: environment
|
||
|
|
idx: 5
|
||
|
|
replica_idx: 0
|
||
|
|
local_idx: 0
|
||
|
|
node_rank: 0
|
||
|
|
hostname: localhost
|
||
|
|
port: 7777
|
||
|
|
gpus: []
|
||
|
|
url: ''
|
||
|
|
environment_key: math
|
||
|
|
environment_index: 0
|
||
|
|
- kind: finetune
|
||
|
|
idx: 6
|
||
|
|
replica_idx: 0
|
||
|
|
local_idx: 0
|
||
|
|
node_rank: 0
|
||
|
|
hostname: localhost
|
||
|
|
port: null
|
||
|
|
gpus:
|
||
|
|
- 3
|
||
|
|
url: ''
|
||
|
|
environment_key: null
|
||
|
|
environment_index: null
|
||
|
|
eval_every_n_versions: 78000
|
||
|
|
model_path: Qwen/Qwen2.5-3B
|
||
|
|
accelerate_config: null
|
||
|
|
use_deepspeed: true
|
||
|
|
deepspeed_config: deepspeed_stage3_bf16
|
||
|
|
use_fsdp: false
|
||
|
|
fsdp:
|
||
|
|
param_dtype: fp32
|
||
|
|
reduce_dtype: fp32
|
||
|
|
buffer_dtype: fp32
|
||
|
|
output_dir: results/qwen2.5_3b_remax_3a1f_4xh100_214753
|
||
|
|
force_restart: false
|
||
|
|
pop_old_data: true
|
||
|
|
max_lag: null
|
||
|
|
attempts: 16
|
||
|
|
train_subset: null
|
||
|
|
debug:
|
||
|
|
mode: ''
|
||
|
|
streams_from: null
|
||
|
|
place_inference_workers: true
|
||
|
|
use_existing_llms: false
|
||
|
|
me:
|
||
|
|
job_idx: null
|
||
|
|
wandb:
|
||
|
|
use_wandb: true
|
||
|
|
fail_on_init_error: false
|
||
|
|
init_timeout: 120
|
||
|
|
wandb_id: null
|
||
|
|
wandb_name: null
|
||
|
|
wandb_entity_name: jaygala24-team
|
||
|
|
wandb_project_name: rl-post-training
|
||
|
|
wandb_resume: always
|
||
|
|
wandb_use_basename: true
|
||
|
|
wandb_workspace_root: results
|
||
|
|
wandb_group: qwen2.5_3b_remax_3a1f_4xh100_214753
|
||
|
|
wandb_dir: null
|
||
|
|
tags: []
|
||
|
|
environments:
|
||
|
|
- key: math
|
||
|
|
mode: remote
|
||
|
|
_target_: pipelinerl.domains.math.MathEnvironment
|
||
|
|
environment_key: math
|
||
|
|
dataset_loader: pipelinerl.domains.math.load_datasets
|
||
|
|
train_dataset_names:
|
||
|
|
- gsm8k_train
|
||
|
|
- math_train
|
||
|
|
test_dataset_names:
|
||
|
|
- gsm8k_test
|
||
|
|
- math_500
|