初始化项目,由ModelHub XC社区提供模型
Model: jaygala24/Qwen2.5-1.5B-RLOO-math-reasoning Source: Original Platform
This commit is contained in:
287
training_config.yaml
Normal file
287
training_config.yaml
Normal file
@@ -0,0 +1,287 @@
|
||||
finetune:
|
||||
data: null
|
||||
model_class: causal-language-modeling
|
||||
config_name: ${..model_path}
|
||||
optim: adamw_torch
|
||||
load_as_bf16: true
|
||||
fp32_lm_head: ${..fp32_lm_head}
|
||||
fp32_layer_prefix: ${..fp32_layer_prefix}
|
||||
use_flash_attention: true
|
||||
attn_implementation: flash_attention_2
|
||||
auto_device_map: false
|
||||
lora:
|
||||
enabled: false
|
||||
task_type: CAUSAL_LM
|
||||
base_model_8bit: false
|
||||
base_model_4bit: false
|
||||
r: 16
|
||||
alpha: 16
|
||||
dropout: 0.05
|
||||
bias: none
|
||||
target_modules: []
|
||||
force_restart: ${..force_restart}
|
||||
resume_dataloader: false
|
||||
train_batch_size: 4
|
||||
valid_batch_size: 4
|
||||
weight_decay: 0.01
|
||||
learning_rate: 1.0e-06
|
||||
gradient_clipping_threshold: 0.3
|
||||
lr_scheduler_type: cosine
|
||||
num_warmup_steps: 25
|
||||
gradient_accumulation_passes: 64
|
||||
gradient_checkpointing: true
|
||||
reentrant_checkpointing: false
|
||||
max_train_steps: 1500
|
||||
interrupt_train_steps: -1
|
||||
max_eval_steps: -1
|
||||
seq_length: 8192
|
||||
seq_packing: true
|
||||
output_dir: ${..output_dir}/finetune
|
||||
seed: ${..seed}
|
||||
save_checkpoint_steps: 100
|
||||
keep_intermediate_checkpoints: true
|
||||
trust_remote_code: false
|
||||
cuda_empty_cache: true
|
||||
sft_config_name: null
|
||||
n_examples: 0
|
||||
log_each_n_steps: 1
|
||||
also_save_steps: []
|
||||
use_safetensors: true
|
||||
save_final_training_state: true
|
||||
seq_parallel: 1
|
||||
objective: rl
|
||||
input: training_data
|
||||
send_weight_updates: true
|
||||
queue_size: 32
|
||||
max_lag: null
|
||||
weight_update_interval: 1
|
||||
pop_old_data: ${..pop_old_data}
|
||||
attempts: 8
|
||||
eval_callback:
|
||||
_target_: pipelinerl.finetune.utils.dummy_eval_callback
|
||||
config_name: ''
|
||||
rl:
|
||||
policy_loss: reinforce
|
||||
divide_advantage_by_std: false
|
||||
kl_coef: 0.0
|
||||
final_kl_coef: 0.0
|
||||
entropy_bonus: 0.0
|
||||
reward_minus_kl_coef: 0.0
|
||||
epsilon_low: 0.02
|
||||
epsilon_high: 0.02
|
||||
use_advantages: true
|
||||
relu_log_p_weights: false
|
||||
clamp_log_ratio_ref_new_value: 5
|
||||
temperature: ${...llm.parameters.temperature}
|
||||
aggregate_loss: sum
|
||||
overlong_filtering: false
|
||||
adv_estimator: rloo
|
||||
filter_zero_advantage_groups: false
|
||||
rewards:
|
||||
correct_answer_finished: 1.0
|
||||
correct_answer_not_finished: 1.0
|
||||
wrong_answer_finished: 0
|
||||
wrong_answer_not_finished: 0
|
||||
no_answer_finished: 0
|
||||
no_answer_not_finished: 0
|
||||
unparsable_finished: 0
|
||||
unparsable_not_finished: 0
|
||||
streams:
|
||||
backend: files
|
||||
seed: 42
|
||||
fp32_lm_head: false
|
||||
fp32_layer_prefix: lm_head
|
||||
actor:
|
||||
log_each_n_secs: 0
|
||||
llm_max_rollouts: 256
|
||||
rollout_workers: 1
|
||||
discount_factor: 1
|
||||
problem_queue_size: 256
|
||||
result_queue_size: 256
|
||||
throughput_window_size: 50
|
||||
shared_memory_entry_size: 10000000
|
||||
rollout_policy: pipelinerl.domains.math.generate_math_rollout
|
||||
system_prompt: Please reason step by step, and put your final answer within \boxed{}.
|
||||
task_template: '{task}'
|
||||
task_prompt: ''
|
||||
environment: null
|
||||
preprocess:
|
||||
input: actor
|
||||
output: training_data
|
||||
n_workers: 8
|
||||
chunk_n_groups: 2
|
||||
raw_queue_size: 8
|
||||
input_queue_size: 32
|
||||
output_queue_size: 32
|
||||
dataset_buffer_size: 0
|
||||
ring_buffer_size: 128
|
||||
max_ready_samples_per_lead: 64
|
||||
pop_old_data: ${..pop_old_data}
|
||||
shared_memory_entry_size: 100000000
|
||||
log_every_n_samples: 128
|
||||
llm:
|
||||
parameters:
|
||||
max_tokens: 4096
|
||||
temperature: 1.0
|
||||
test_llm:
|
||||
parameters:
|
||||
max_tokens: 4096
|
||||
temperature: 1.0
|
||||
top_p: 0.95
|
||||
top_k: 50
|
||||
vllm_config:
|
||||
use_v1: false
|
||||
quantization: null
|
||||
vllm_kwargs:
|
||||
dtype: bfloat16
|
||||
gpu-memory-utilization: 0.92
|
||||
max-num-seqs: 64
|
||||
max-num-batched-tokens: 16384
|
||||
enable-chunked-prefill: ''
|
||||
return-tokens-as-token-ids: ''
|
||||
tensor-parallel-size: 1
|
||||
pipeline-parallel-size: 1
|
||||
generation-config: vllm
|
||||
max_model_len: 8192
|
||||
num-scheduler-steps: 8
|
||||
disable-log-requests: ''
|
||||
disable-frontend-multiprocessing: ''
|
||||
world:
|
||||
replicas: 1
|
||||
actor_fraction: 3
|
||||
preprocessor_fraction: 0
|
||||
finetune_fraction: 1
|
||||
env_replicas: 1
|
||||
actor_group_port: 9000
|
||||
environment_start_port: 7777
|
||||
jobs:
|
||||
- kind: actor_llm
|
||||
idx: 0
|
||||
replica_idx: 0
|
||||
local_idx: 0
|
||||
node_rank: 0
|
||||
hostname: localhost
|
||||
port: 8080
|
||||
gpus:
|
||||
- 0
|
||||
url: http://localhost:8080
|
||||
environment_key: null
|
||||
environment_index: null
|
||||
- kind: actor_llm
|
||||
idx: 1
|
||||
replica_idx: 1
|
||||
local_idx: 1
|
||||
node_rank: 0
|
||||
hostname: localhost
|
||||
port: 8081
|
||||
gpus:
|
||||
- 1
|
||||
url: http://localhost:8081
|
||||
environment_key: null
|
||||
environment_index: null
|
||||
- kind: actor_llm
|
||||
idx: 2
|
||||
replica_idx: 2
|
||||
local_idx: 2
|
||||
node_rank: 0
|
||||
hostname: localhost
|
||||
port: 8082
|
||||
gpus:
|
||||
- 2
|
||||
url: http://localhost:8082
|
||||
environment_key: null
|
||||
environment_index: null
|
||||
- kind: actor
|
||||
idx: 3
|
||||
replica_idx: 0
|
||||
local_idx: 0
|
||||
node_rank: 0
|
||||
hostname: localhost
|
||||
port: null
|
||||
gpus: []
|
||||
url: ''
|
||||
environment_key: null
|
||||
environment_index: null
|
||||
- kind: preprocessor
|
||||
idx: 4
|
||||
replica_idx: 0
|
||||
local_idx: 0
|
||||
node_rank: 0
|
||||
hostname: localhost
|
||||
port: null
|
||||
gpus: []
|
||||
url: ''
|
||||
environment_key: null
|
||||
environment_index: null
|
||||
- kind: environment
|
||||
idx: 5
|
||||
replica_idx: 0
|
||||
local_idx: 0
|
||||
node_rank: 0
|
||||
hostname: localhost
|
||||
port: 7777
|
||||
gpus: []
|
||||
url: ''
|
||||
environment_key: math
|
||||
environment_index: 0
|
||||
- kind: finetune
|
||||
idx: 6
|
||||
replica_idx: 0
|
||||
local_idx: 0
|
||||
node_rank: 0
|
||||
hostname: localhost
|
||||
port: null
|
||||
gpus:
|
||||
- 3
|
||||
url: ''
|
||||
environment_key: null
|
||||
environment_index: null
|
||||
eval_every_n_versions: 78000
|
||||
model_path: Qwen/Qwen2.5-1.5B
|
||||
accelerate_config: null
|
||||
use_deepspeed: true
|
||||
deepspeed_config: deepspeed_stage3_bf16
|
||||
use_fsdp: false
|
||||
fsdp:
|
||||
param_dtype: fp32
|
||||
reduce_dtype: fp32
|
||||
buffer_dtype: fp32
|
||||
output_dir: results/qwen2.5_1.5b_rloo_no_kl_3a1f_4xh100_236657
|
||||
force_restart: false
|
||||
pop_old_data: true
|
||||
max_lag: null
|
||||
attempts: 16
|
||||
train_subset: null
|
||||
debug:
|
||||
mode: ''
|
||||
streams_from: null
|
||||
place_inference_workers: true
|
||||
use_existing_llms: false
|
||||
me:
|
||||
job_idx: null
|
||||
wandb:
|
||||
use_wandb: true
|
||||
fail_on_init_error: false
|
||||
init_timeout: 120
|
||||
wandb_id: null
|
||||
wandb_name: null
|
||||
wandb_entity_name: jaygala24-team
|
||||
wandb_project_name: rl-post-training
|
||||
wandb_resume: always
|
||||
wandb_use_basename: true
|
||||
wandb_workspace_root: results
|
||||
wandb_group: qwen2.5_1.5b_rloo_no_kl_3a1f_4xh100_236657
|
||||
wandb_dir: null
|
||||
tags: []
|
||||
environments:
|
||||
- key: math
|
||||
mode: remote
|
||||
_target_: pipelinerl.domains.math.MathEnvironment
|
||||
environment_key: math
|
||||
dataset_loader: pipelinerl.domains.math.load_datasets
|
||||
train_dataset_names:
|
||||
- gsm8k_train
|
||||
- math_train
|
||||
test_dataset_names:
|
||||
- gsm8k_test
|
||||
- math_500
|
||||
Reference in New Issue
Block a user