819 lines
198 KiB
Plaintext
819 lines
198 KiB
Plaintext
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
||
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
||
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
||
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
||
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
||
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
||
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
||
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
||
2026-04-10 23:31:29 - INFO - __main__ - Model parameters ModelArguments(base_model_revision=None, model_name_or_path='/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758', model_revision='main', model_code_revision=None, torch_dtype='bfloat16', tokenizer_name_or_path=None, trust_remote_code=False, attn_implementation='flash_attention_2', use_peft=False, lora_r=16, lora_alpha=32, lora_dropout=0.05, lora_target_modules=None, lora_modules_to_save=None, load_in_8bit=False, load_in_4bit=False, bnb_4bit_quant_type='nf4', use_bnb_nested_quant=False, bnb_4bit_quant_storage='uint8')
|
||
2026-04-10 23:31:29 - INFO - __main__ - Data parameters DataArguments(chat_template=None, dataset_mixer={'Anthropic/hh-rlhf': 1.0}, text_column='text', dataset_splits=['train', 'test'], dataset_configs=['helpful-base'], dataset_dir=None, preprocessing_num_workers=12, use_persistent_hf_cache=True, hf_cache_dir='/scratch/feng.yulu/dynamic-dpo-v4/hf/datasets', truncation_side=None, auto_insert_empty_system_msg=True, preprocessing_log_samples=0, preprocessing_log_dir=None)
|
||
2026-04-10 23:31:29 - INFO - __main__ - Training/evaluation parameters EpsilonDPOConfig(
|
||
_n_gpu=1,
|
||
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
|
||
adafactor=False,
|
||
adam_beta1=0.9,
|
||
adam_beta2=0.999,
|
||
adam_epsilon=1e-08,
|
||
auto_find_batch_size=False,
|
||
average_tokens_across_devices=False,
|
||
batch_eval_metrics=False,
|
||
beta=0.01,
|
||
bf16=True,
|
||
bf16_full_eval=False,
|
||
data_seed=None,
|
||
dataloader_drop_last=True,
|
||
dataloader_num_workers=0,
|
||
dataloader_persistent_workers=False,
|
||
dataloader_pin_memory=True,
|
||
dataloader_prefetch_factor=None,
|
||
dataset_num_proc=12,
|
||
ddp_backend=None,
|
||
ddp_broadcast_buffers=None,
|
||
ddp_bucket_cap_mb=None,
|
||
ddp_find_unused_parameters=None,
|
||
ddp_timeout=1800,
|
||
debug=[],
|
||
deepspeed=None,
|
||
disable_dropout=True,
|
||
disable_tqdm=False,
|
||
do_eval=True,
|
||
do_predict=False,
|
||
do_train=False,
|
||
epsilon=0.01,
|
||
eval_accumulation_steps=None,
|
||
eval_delay=0,
|
||
eval_do_concat_batches=True,
|
||
eval_on_start=False,
|
||
eval_steps=100,
|
||
eval_strategy=IntervalStrategy.STEPS,
|
||
eval_use_gather_object=False,
|
||
f_alpha_divergence_coef=1.0,
|
||
f_divergence_type=FDivergenceType.REVERSE_KL,
|
||
force_use_ref_model=False,
|
||
fp16=False,
|
||
fp16_backend=auto,
|
||
fp16_full_eval=False,
|
||
fp16_opt_level=O1,
|
||
fsdp=[],
|
||
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
|
||
fsdp_min_num_params=0,
|
||
fsdp_transformer_layer_cls_to_wrap=None,
|
||
full_determinism=False,
|
||
generate_during_eval=False,
|
||
gradient_accumulation_steps=1,
|
||
gradient_checkpointing=True,
|
||
gradient_checkpointing_kwargs={'use_reentrant': False},
|
||
greater_is_better=None,
|
||
group_by_length=False,
|
||
half_precision_backend=auto,
|
||
hub_always_push=False,
|
||
hub_model_id=W-61/llama-3-8b-base-epsilon-dpo-hh-helpful,
|
||
hub_model_revision=main,
|
||
hub_private_repo=None,
|
||
hub_strategy=HubStrategy.EVERY_SAVE,
|
||
hub_token=<HUB_TOKEN>,
|
||
ignore_data_skip=False,
|
||
include_for_metrics=[],
|
||
include_inputs_for_metrics=False,
|
||
include_num_input_tokens_seen=False,
|
||
include_tokens_per_second=False,
|
||
is_encoder_decoder=None,
|
||
jit_mode_eval=False,
|
||
label_names=None,
|
||
label_pad_token_id=-100,
|
||
label_smoothing=0.0,
|
||
label_smoothing_factor=0.0,
|
||
learning_rate=5e-07,
|
||
length_column_name=length,
|
||
load_best_model_at_end=False,
|
||
local_rank=0,
|
||
log_level=info,
|
||
log_level_replica=warning,
|
||
log_on_each_node=True,
|
||
logging_dir=outputs/llama-3-8b-base-epsilon-dpo-hh-helpful/runs/Apr10_23-31-28_d4054,
|
||
logging_first_step=True,
|
||
logging_nan_inf_filter=True,
|
||
logging_steps=5,
|
||
logging_strategy=IntervalStrategy.STEPS,
|
||
loss_type=sigmoid,
|
||
lr_scheduler_kwargs={},
|
||
lr_scheduler_type=SchedulerType.COSINE,
|
||
max_grad_norm=1.0,
|
||
max_length=512,
|
||
max_prompt_length=256,
|
||
max_steps=-1,
|
||
max_target_length=None,
|
||
metric_for_best_model=None,
|
||
model_adapter_name=None,
|
||
model_init_kwargs=None,
|
||
mp_parameters=,
|
||
neftune_noise_alpha=None,
|
||
no_cuda=False,
|
||
non_finite_logits_handling=error,
|
||
num_train_epochs=1,
|
||
optim=OptimizerNames.ADAMW_TORCH,
|
||
optim_args=None,
|
||
optim_target_modules=None,
|
||
output_dir=/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108,
|
||
overwrite_output_dir=False,
|
||
padding_value=None,
|
||
past_index=-1,
|
||
per_device_eval_batch_size=16,
|
||
per_device_train_batch_size=16,
|
||
post_tokenization_log_dir=None,
|
||
post_tokenization_log_samples=0,
|
||
precompute_ref_batch_size=None,
|
||
precompute_ref_eval_batch_size=None,
|
||
precompute_ref_log_probs=False,
|
||
prediction_loss_only=False,
|
||
push_to_hub=False,
|
||
push_to_hub_model_id=None,
|
||
push_to_hub_organization=None,
|
||
push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
|
||
ray_scope=last,
|
||
ref_adapter_name=None,
|
||
ref_model_init_kwargs=None,
|
||
ref_model_mixup_alpha=0.9,
|
||
ref_model_sync_steps=64,
|
||
reference_free=False,
|
||
remove_unused_columns=False,
|
||
report_to=['wandb'],
|
||
restore_callback_states_from_checkpoint=False,
|
||
resume_from_checkpoint=None,
|
||
reuse_tokenized_dataset=True,
|
||
rpo_alpha=None,
|
||
run_name=llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108,
|
||
save_on_each_node=False,
|
||
save_only_model=False,
|
||
save_safetensors=True,
|
||
save_steps=200,
|
||
save_strategy=SaveStrategy.STEPS,
|
||
save_total_limit=2,
|
||
seed=42,
|
||
sft_weight=0.0,
|
||
skip_memory_metrics=True,
|
||
sync_ref_model=False,
|
||
tf32=None,
|
||
tokenization_batch_size=128,
|
||
tokenization_mode=online,
|
||
tokenized_dataset_cache_dir=/scratch/feng.yulu/dynamic-dpo-v4/tokenized_preferences,
|
||
torch_compile=False,
|
||
torch_compile_backend=None,
|
||
torch_compile_mode=None,
|
||
torch_empty_cache_steps=None,
|
||
torchdynamo=None,
|
||
tp_size=0,
|
||
tpu_metrics_debug=False,
|
||
tpu_num_cores=None,
|
||
trainer_type=epsilon_dpo,
|
||
truncation_mode=keep_end,
|
||
use_cpu=False,
|
||
use_ipex=False,
|
||
use_legacy_prediction_loop=False,
|
||
use_liger_kernel=False,
|
||
use_mps_device=False,
|
||
warmup_ratio=0.1,
|
||
warmup_steps=0,
|
||
weight_decay=0.0,
|
||
)
|
||
2026-04-10 23:31:29 - INFO - __main__ - Epsilon-DPO parameters: beta=0.01, epsilon=0.01, gradient_accumulation_steps=1
|
||
2026-04-10 23:31:29 - INFO - __main__ - Using persistent HF datasets cache at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets
|
||
2026-04-10 23:31:33 - WARNING - __main__ - Dropped 237 non-canonical HH preference examples from split `train` before normalization (126 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 111 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
|
||
|
||
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (train): 3%|▎ | 1167/43598 [00:00<00:03, 11616.23 examples/s]
|
||
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (train): 3%|▎ | 1103/43598 [00:00<00:03, 10977.95 examples/s]
|
||
Normalizing raw HH preferences (train): 5%|▌ | 2387/43598 [00:00<00:03, 11939.37 examples/s]
|
||
Normalizing raw HH preferences (train): 3%|▎ | 1169/43598 [00:00<00:03, 11641.27 examples/s]
|
||
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (train): 3%|▎ | 1148/43598 [00:00<00:03, 11415.64 examples/s]
|
||
Normalizing raw HH preferences (train): 6%|▌ | 2404/43598 [00:00<00:03, 12167.26 examples/s]
|
||
Normalizing raw HH preferences (train): 8%|▊ | 3695/43598 [00:00<00:03, 12300.75 examples/s]
|
||
Normalizing raw HH preferences (train): 6%|▌ | 2457/43598 [00:00<00:03, 12364.52 examples/s]
|
||
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (train): 3%|▎ | 1150/43598 [00:00<00:03, 11415.34 examples/s]
|
||
Normalizing raw HH preferences (train): 3%|▎ | 1172/43598 [00:00<00:03, 11621.02 examples/s]
|
||
Normalizing raw HH preferences (train): 3%|▎ | 1165/43598 [00:00<00:03, 11550.68 examples/s]
|
||
Normalizing raw HH preferences (train): 6%|▌ | 2443/43598 [00:00<00:03, 12310.80 examples/s]
|
||
Normalizing raw HH preferences (train): 9%|▊ | 3706/43598 [00:00<00:03, 12522.87 examples/s]
|
||
Normalizing raw HH preferences (train): 11%|█▏ | 4937/43598 [00:00<00:03, 12343.81 examples/s]
|
||
Normalizing raw HH preferences (train): 9%|▊ | 3712/43598 [00:00<00:03, 12446.55 examples/s]
|
||
Normalizing raw HH preferences (train): 3%|▎ | 1115/43598 [00:00<00:03, 11093.49 examples/s]
|
||
Normalizing raw HH preferences (train): 6%|▌ | 2438/43598 [00:00<00:03, 12272.06 examples/s]
|
||
Normalizing raw HH preferences (train): 6%|▌ | 2474/43598 [00:00<00:03, 12437.50 examples/s]
|
||
Normalizing raw HH preferences (train): 6%|▌ | 2453/43598 [00:00<00:03, 12327.38 examples/s]
|
||
Normalizing raw HH preferences (train): 9%|▊ | 3733/43598 [00:00<00:03, 12575.84 examples/s]
|
||
Normalizing raw HH preferences (train): 11%|█▏ | 4974/43598 [00:00<00:03, 12582.63 examples/s]
|
||
Normalizing raw HH preferences (train): 11%|█▏ | 4970/43598 [00:00<00:03, 12494.59 examples/s]
|
||
Normalizing raw HH preferences (train): 9%|▊ | 3719/43598 [00:00<00:03, 12515.61 examples/s]
|
||
Normalizing raw HH preferences (train): 5%|▌ | 2239/43598 [00:00<00:03, 11175.74 examples/s]
|
||
Normalizing raw HH preferences (train): 9%|▊ | 3766/43598 [00:00<00:03, 12650.64 examples/s]
|
||
Normalizing raw HH preferences (train): 9%|▊ | 3732/43598 [00:00<00:03, 12533.09 examples/s]
|
||
Normalizing raw HH preferences (train): 16%|█▌ | 6773/43598 [00:00<00:02, 12291.18 examples/s]
|
||
Normalizing raw HH preferences (train): 11%|█▏ | 5000/43598 [00:00<00:03, 12274.01 examples/s]
|
||
Normalizing raw HH preferences (train): 8%|▊ | 3479/43598 [00:00<00:03, 11728.87 examples/s]
|
||
Normalizing raw HH preferences (train): 11%|█▏ | 4978/43598 [00:00<00:03, 12537.24 examples/s]
|
||
Normalizing raw HH preferences (train): 16%|█▌ | 6844/43598 [00:00<00:02, 12526.35 examples/s]
|
||
Normalizing raw HH preferences (train): 11%|█▏ | 4991/43598 [00:00<00:03, 12554.17 examples/s]
|
||
Normalizing raw HH preferences (train): 16%|█▌ | 6830/43598 [00:00<00:02, 12446.84 examples/s]
|
||
Normalizing raw HH preferences (train): 14%|█▍ | 6289/43598 [00:00<00:02, 12490.50 examples/s]
|
||
Normalizing raw HH preferences (train): 13%|█▎ | 5711/43598 [00:00<00:03, 12591.29 examples/s]
|
||
Normalizing raw HH preferences (train): 20%|█▉ | 8683/43598 [00:00<00:02, 12327.95 examples/s]
|
||
Normalizing raw HH preferences (train): 11%|█ | 4712/43598 [00:00<00:03, 11963.18 examples/s]
|
||
Normalizing raw HH preferences (train): 16%|█▌ | 6807/43598 [00:00<00:02, 12375.99 examples/s]
|
||
Normalizing raw HH preferences (train): 17%|█▋ | 7581/43598 [00:00<00:02, 12632.53 examples/s]
|
||
Normalizing raw HH preferences (train): 20%|██ | 8738/43598 [00:00<00:02, 12563.61 examples/s]
|
||
Normalizing raw HH preferences (train): 16%|█▌ | 6987/43598 [00:00<00:02, 12640.56 examples/s]
|
||
Normalizing raw HH preferences (train): 16%|█▌ | 6848/43598 [00:00<00:02, 12470.46 examples/s]
|
||
Normalizing raw HH preferences (train): 23%|██▎ | 9961/43598 [00:00<00:02, 12447.12 examples/s]
|
||
Normalizing raw HH preferences (train): 20%|█▉ | 8702/43598 [00:00<00:02, 12456.45 examples/s]
|
||
Normalizing raw HH preferences (train): 14%|█▎ | 5942/43598 [00:00<00:03, 12083.19 examples/s]
|
||
Normalizing raw HH preferences (train): 20%|██ | 8879/43598 [00:00<00:02, 12741.14 examples/s]
|
||
Normalizing raw HH preferences (train): 23%|██▎ | 10000/43598 [00:00<00:02, 12397.03 examples/s]
|
||
Normalizing raw HH preferences (train): 23%|██▎ | 9987/43598 [00:00<00:02, 12562.67 examples/s]
|
||
Normalizing raw HH preferences (train): 20%|█▉ | 8690/43598 [00:00<00:02, 12405.07 examples/s]
|
||
Normalizing raw HH preferences (train): 20%|██ | 8876/43598 [00:00<00:02, 12615.39 examples/s]
|
||
Normalizing raw HH preferences (train): 20%|█▉ | 8718/43598 [00:00<00:02, 12463.51 examples/s]
|
||
Normalizing raw HH preferences (train): 27%|██▋ | 11830/43598 [00:00<00:02, 12448.10 examples/s]
|
||
Normalizing raw HH preferences (train): 18%|█▊ | 7695/43598 [00:00<00:03, 11905.66 examples/s]
|
||
Normalizing raw HH preferences (train): 26%|██▌ | 11307/43598 [00:00<00:02, 12583.59 examples/s]
|
||
Normalizing raw HH preferences (train): 25%|██▍ | 10783/43598 [00:00<00:02, 12718.17 examples/s]
|
||
Normalizing raw HH preferences (train): 23%|██▎ | 9980/43598 [00:00<00:02, 12534.83 examples/s]
|
||
Normalizing raw HH preferences (train): 23%|██▎ | 9998/43598 [00:00<00:02, 12553.81 examples/s]
|
||
Normalizing raw HH preferences (train): 27%|██▋ | 11871/43598 [00:00<00:02, 12557.64 examples/s]
|
||
Normalizing raw HH preferences (train): 20%|██ | 8935/43598 [00:00<00:02, 12048.27 examples/s]
|
||
Normalizing raw HH preferences (train): 25%|██▍ | 10768/43598 [00:00<00:02, 12611.52 examples/s]
|
||
Normalizing raw HH preferences (train): 29%|██▉ | 12701/43598 [00:01<00:02, 12709.73 examples/s]
|
||
Normalizing raw HH preferences (train): 27%|██▋ | 11864/43598 [00:00<00:02, 12541.47 examples/s]
|
||
Normalizing raw HH preferences (train): 29%|██▉ | 12707/43598 [00:01<00:02, 12715.88 examples/s]
|
||
Normalizing raw HH preferences (train): 27%|██▋ | 11850/43598 [00:00<00:02, 12472.69 examples/s]
|
||
Normalizing raw HH preferences (train): 25%|██▍ | 10767/43598 [00:00<00:02, 12106.84 examples/s]
|
||
Normalizing raw HH preferences (train): 30%|███ | 13259/43598 [00:01<00:03, 8915.86 examples/s]
|
||
Normalizing raw HH preferences (train): 29%|██▉ | 12701/43598 [00:01<00:02, 12585.81 examples/s]
|
||
Normalizing raw HH preferences (train): 28%|██▊ | 12000/43598 [00:01<00:02, 11935.62 examples/s]
|
||
Normalizing raw HH preferences (train): 33%|███▎ | 14495/43598 [00:01<00:03, 9615.69 examples/s]
|
||
Normalizing raw HH preferences (train): 30%|███ | 13258/43598 [00:01<00:03, 8811.67 examples/s]
|
||
Normalizing raw HH preferences (train): 33%|███▎ | 14516/43598 [00:01<00:03, 8999.60 examples/s]
|
||
Normalizing raw HH preferences (train): 36%|███▌ | 15762/43598 [00:01<00:02, 10301.94 examples/s]
|
||
Normalizing raw HH preferences (train): 33%|███▎ | 14518/43598 [00:01<00:03, 9576.74 examples/s]
|
||
Normalizing raw HH preferences (train): 30%|███ | 13266/43598 [00:01<00:03, 8612.41 examples/s]
|
||
Normalizing raw HH preferences (train): 36%|███▋ | 15807/43598 [00:01<00:02, 9797.20 examples/s]
|
||
Normalizing raw HH preferences (train): 39%|███▉ | 17000/43598 [00:01<00:02, 10647.31 examples/s]
|
||
Normalizing raw HH preferences (train): 36%|███▌ | 15804/43598 [00:01<00:02, 10311.14 examples/s]
|
||
Normalizing raw HH preferences (train): 33%|███▎ | 14545/43598 [00:01<00:03, 9339.21 examples/s]
|
||
Normalizing raw HH preferences (train): 30%|███ | 13258/43598 [00:01<00:04, 7175.18 examples/s]
|
||
Normalizing raw HH preferences (train): 33%|███▎ | 14532/43598 [00:01<00:03, 9415.51 examples/s]
|
||
Normalizing raw HH preferences (train): 39%|███▉ | 17000/43598 [00:01<00:02, 10206.97 examples/s]
|
||
Normalizing raw HH preferences (train): 33%|███▎ | 14511/43598 [00:01<00:03, 7384.46 examples/s]
|
||
Normalizing raw HH preferences (train): 42%|████▏ | 18219/43598 [00:01<00:02, 11039.08 examples/s]
|
||
Normalizing raw HH preferences (train): 39%|███▉ | 17000/43598 [00:01<00:02, 10677.57 examples/s]
|
||
Normalizing raw HH preferences (train): 36%|███▋ | 15820/43598 [00:01<00:02, 9981.62 examples/s]
|
||
Normalizing raw HH preferences (train): 33%|███▎ | 14521/43598 [00:01<00:03, 8106.85 examples/s]
|
||
Normalizing raw HH preferences (train): 36%|███▋ | 15815/43598 [00:01<00:02, 10167.20 examples/s]
|
||
Normalizing raw HH preferences (train): 42%|████▏ | 18264/43598 [00:01<00:02, 10795.13 examples/s]
|
||
Normalizing raw HH preferences (train): 36%|███▋ | 15812/43598 [00:01<00:03, 8298.30 examples/s]
|
||
Normalizing raw HH preferences (train): 30%|███ | 13255/43598 [00:01<00:04, 6592.80 examples/s]
|
||
Normalizing raw HH preferences (train): 45%|████▍ | 19465/43598 [00:01<00:02, 11415.84 examples/s]
|
||
Normalizing raw HH preferences (train): 42%|████▏ | 18256/43598 [00:01<00:02, 11157.93 examples/s]
|
||
Normalizing raw HH preferences (train): 39%|███▉ | 17008/43598 [00:01<00:02, 10384.19 examples/s]
|
||
Normalizing raw HH preferences (train): 36%|███▌ | 15797/43598 [00:01<00:03, 9019.54 examples/s]
|
||
Normalizing raw HH preferences (train): 39%|███▉ | 17000/43598 [00:01<00:02, 10515.96 examples/s]
|
||
Normalizing raw HH preferences (train): 45%|████▍ | 19543/43598 [00:01<00:02, 11306.36 examples/s]
|
||
Normalizing raw HH preferences (train): 39%|███▉ | 17000/43598 [00:01<00:02, 8932.12 examples/s]
|
||
Normalizing raw HH preferences (train): 33%|███▎ | 14486/43598 [00:01<00:03, 7594.81 examples/s]
|
||
Normalizing raw HH preferences (train): 48%|████▊ | 20744/43598 [00:01<00:01, 11791.16 examples/s]
|
||
Normalizing raw HH preferences (train): 45%|████▍ | 19531/43598 [00:01<00:02, 11583.75 examples/s]
|
||
Normalizing raw HH preferences (train): 42%|████▏ | 18277/43598 [00:01<00:02, 10919.87 examples/s]
|
||
Normalizing raw HH preferences (train): 39%|███▉ | 17000/43598 [00:01<00:02, 9624.39 examples/s]
|
||
Normalizing raw HH preferences (train): 42%|████▏ | 18248/43598 [00:01<00:02, 11011.64 examples/s]
|
||
Normalizing raw HH preferences (train): 48%|████▊ | 20829/43598 [00:01<00:01, 11721.04 examples/s]
|
||
Normalizing raw HH preferences (train): 42%|████▏ | 18273/43598 [00:01<00:02, 9734.44 examples/s]
|
||
Normalizing raw HH preferences (train): 36%|███▌ | 15747/43598 [00:01<00:03, 8598.13 examples/s]
|
||
Normalizing raw HH preferences (train): 50%|█████ | 21998/43598 [00:01<00:01, 12000.30 examples/s]
|
||
Normalizing raw HH preferences (train): 48%|████▊ | 20819/43598 [00:01<00:01, 11938.96 examples/s]
|
||
Normalizing raw HH preferences (train): 45%|████▍ | 19560/43598 [00:01<00:02, 11395.81 examples/s]
|
||
Normalizing raw HH preferences (train): 42%|████▏ | 18253/43598 [00:01<00:02, 10311.58 examples/s]
|
||
Normalizing raw HH preferences (train): 45%|████▍ | 19513/43598 [00:01<00:02, 11445.73 examples/s]
|
||
Normalizing raw HH preferences (train): 45%|████▍ | 19566/43598 [00:01<00:02, 10473.13 examples/s]
|
||
Normalizing raw HH preferences (train): 39%|███▉ | 16983/43598 [00:01<00:02, 9433.52 examples/s]
|
||
Normalizing raw HH preferences (train): 48%|████▊ | 20850/43598 [00:01<00:01, 11787.00 examples/s]
|
||
Normalizing raw HH preferences (train): 52%|█████▏ | 22706/43598 [00:01<00:01, 12009.96 examples/s]
|
||
Normalizing raw HH preferences (train): 45%|████▍ | 19521/43598 [00:01<00:02, 10909.78 examples/s]
|
||
Normalizing raw HH preferences (train): 48%|████▊ | 20779/43598 [00:01<00:01, 11778.05 examples/s]
|
||
Normalizing raw HH preferences (train): 55%|█████▍ | 23858/43598 [00:02<00:01, 12148.46 examples/s]
|
||
Normalizing raw HH preferences (train): 52%|█████▏ | 22693/43598 [00:01<00:01, 12119.28 examples/s]
|
||
Normalizing raw HH preferences (train): 48%|████▊ | 20863/43598 [00:01<00:02, 11092.42 examples/s]
|
||
Normalizing raw HH preferences (train): 55%|█████▌ | 23997/43598 [00:02<00:01, 12240.69 examples/s]
|
||
Normalizing raw HH preferences (train): 48%|████▊ | 20802/43598 [00:01<00:01, 11412.80 examples/s]
|
||
Normalizing raw HH preferences (train): 43%|████▎ | 18773/43598 [00:01<00:02, 10238.93 examples/s]
|
||
Normalizing raw HH preferences (train): 55%|█████▌ | 23983/43598 [00:02<00:01, 12321.92 examples/s]
|
||
Normalizing raw HH preferences (train): 52%|█████▏ | 22730/43598 [00:01<00:01, 12053.44 examples/s]
|
||
Normalizing raw HH preferences (train): 59%|█████▉ | 25697/43598 [00:02<00:01, 12183.18 examples/s]
|
||
Normalizing raw HH preferences (train): 52%|█████▏ | 22692/43598 [00:02<00:01, 12012.22 examples/s]
|
||
Normalizing raw HH preferences (train): 52%|█████▏ | 22752/43598 [00:02<00:01, 11611.37 examples/s]
|
||
Normalizing raw HH preferences (train): 46%|████▌ | 20000/43598 [00:01<00:02, 10552.22 examples/s]
|
||
Normalizing raw HH preferences (train): 59%|█████▉ | 25853/43598 [00:02<00:01, 12283.90 examples/s]
|
||
Normalizing raw HH preferences (train): 55%|█████▌ | 24000/43598 [00:02<00:01, 12041.86 examples/s]
|
||
Normalizing raw HH preferences (train): 52%|█████▏ | 22561/43598 [00:02<00:01, 11523.00 examples/s]
|
||
Normalizing raw HH preferences (train): 55%|█████▍ | 23964/43598 [00:02<00:01, 12193.27 examples/s]
|
||
Normalizing raw HH preferences (train): 59%|█████▉ | 25836/43598 [00:02<00:01, 12330.12 examples/s]
|
||
Normalizing raw HH preferences (train): 55%|█████▌ | 24000/43598 [00:02<00:01, 11720.07 examples/s]
|
||
Normalizing raw HH preferences (train): 63%|██████▎ | 27499/43598 [00:02<00:01, 12125.51 examples/s]
|
||
Normalizing raw HH preferences (train): 49%|████▊ | 21233/43598 [00:02<00:02, 10992.07 examples/s]
|
||
Normalizing raw HH preferences (train): 58%|█████▊ | 25279/43598 [00:02<00:01, 12237.34 examples/s]
|
||
Normalizing raw HH preferences (train): 55%|█████▍ | 23785/43598 [00:02<00:01, 11705.10 examples/s]
|
||
Normalizing raw HH preferences (train): 64%|██████▎ | 27696/43598 [00:02<00:01, 12280.00 examples/s]
|
||
Normalizing raw HH preferences (train): 58%|█████▊ | 25252/43598 [00:02<00:01, 11924.88 examples/s]
|
||
Normalizing raw HH preferences (train): 66%|██████▌ | 28755/43598 [00:02<00:01, 12228.19 examples/s]
|
||
Normalizing raw HH preferences (train): 52%|█████▏ | 22475/43598 [00:02<00:01, 11362.20 examples/s]
|
||
Normalizing raw HH preferences (train): 59%|█████▉ | 25806/43598 [00:02<00:01, 12221.13 examples/s]
|
||
Normalizing raw HH preferences (train): 64%|██████▎ | 27692/43598 [00:02<00:01, 12303.57 examples/s]
|
||
Normalizing raw HH preferences (train): 61%|██████ | 26546/43598 [00:02<00:01, 12355.23 examples/s]
|
||
Normalizing raw HH preferences (train): 57%|█████▋ | 25000/43598 [00:02<00:01, 11685.27 examples/s]
|
||
Normalizing raw HH preferences (train): 66%|██████▋ | 28964/43598 [00:02<00:01, 12375.34 examples/s]
|
||
Normalizing raw HH preferences (train): 61%|██████ | 26511/43598 [00:02<00:01, 12104.13 examples/s]
|
||
Normalizing raw HH preferences (train): 54%|█████▍ | 23738/43598 [00:02<00:01, 11706.47 examples/s]
|
||
Normalizing raw HH preferences (train): 69%|██████▉ | 30000/43598 [00:02<00:01, 12119.58 examples/s]
|
||
Normalizing raw HH preferences (train): 66%|██████▋ | 28973/43598 [00:02<00:01, 12423.97 examples/s]
|
||
Normalizing raw HH preferences (train): 64%|██████▍ | 27820/43598 [00:02<00:01, 12461.39 examples/s]
|
||
Normalizing raw HH preferences (train): 60%|██████ | 26268/43598 [00:02<00:01, 11954.84 examples/s]
|
||
Normalizing raw HH preferences (train): 64%|██████▎ | 27686/43598 [00:02<00:01, 12209.22 examples/s]
|
||
Normalizing raw HH preferences (train): 64%|██████▎ | 27790/43598 [00:02<00:01, 12293.06 examples/s]
|
||
Normalizing raw HH preferences (train): 57%|█████▋ | 24967/43598 [00:02<00:01, 11867.48 examples/s]
|
||
Normalizing raw HH preferences (train): 72%|███████▏ | 31273/43598 [00:02<00:01, 12279.83 examples/s]
|
||
Normalizing raw HH preferences (train): 71%|███████ | 30865/43598 [00:02<00:01, 12473.50 examples/s]
|
||
Normalizing raw HH preferences (train): 63%|██████▎ | 27526/43598 [00:02<00:01, 12130.09 examples/s]
|
||
Normalizing raw HH preferences (train): 66%|██████▋ | 28956/43598 [00:02<00:01, 12326.03 examples/s]
|
||
Normalizing raw HH preferences (train): 71%|███████ | 30859/43598 [00:02<00:01, 12472.89 examples/s]
|
||
Normalizing raw HH preferences (train): 68%|██████▊ | 29726/43598 [00:02<00:01, 12548.69 examples/s]
|
||
Normalizing raw HH preferences (train): 75%|███████▍ | 32545/43598 [00:02<00:00, 12398.21 examples/s]
|
||
Normalizing raw HH preferences (train): 68%|██████▊ | 29688/43598 [00:02<00:01, 12378.57 examples/s]
|
||
Normalizing raw HH preferences (train): 66%|██████▌ | 28797/43598 [00:02<00:01, 12293.33 examples/s]
|
||
Normalizing raw HH preferences (train): 61%|██████▏ | 26779/43598 [00:02<00:01, 11942.03 examples/s]
|
||
Normalizing raw HH preferences (train): 75%|███████▌ | 32745/43598 [00:02<00:00, 12489.82 examples/s]
|
||
Normalizing raw HH preferences (train): 71%|███████ | 31000/43598 [00:02<00:01, 12386.24 examples/s]
|
||
Normalizing raw HH preferences (train): 78%|███████▊ | 33792/43598 [00:02<00:00, 12416.19 examples/s]
|
||
Normalizing raw HH preferences (train): 71%|███████ | 30826/43598 [00:02<00:01, 12371.90 examples/s]
|
||
Normalizing raw HH preferences (train): 75%|███████▌ | 32737/43598 [00:02<00:00, 12483.76 examples/s]
|
||
Normalizing raw HH preferences (train): 71%|███████ | 30972/43598 [00:02<00:01, 12499.11 examples/s]
|
||
Normalizing raw HH preferences (train): 74%|███████▍ | 32302/43598 [00:02<00:00, 12555.58 examples/s]
|
||
Normalizing raw HH preferences (train): 78%|███████▊ | 34000/43598 [00:02<00:00, 12280.70 examples/s]
|
||
Normalizing raw HH preferences (train): 70%|███████ | 30699/43598 [00:02<00:01, 12391.72 examples/s]
|
||
Normalizing raw HH preferences (train): 65%|██████▌ | 28510/43598 [00:02<00:01, 11797.17 examples/s]
|
||
Normalizing raw HH preferences (train): 78%|███████▊ | 33992/43598 [00:02<00:00, 12495.73 examples/s]
|
||
Normalizing raw HH preferences (train): 75%|███████▌ | 32700/43598 [00:02<00:00, 12397.23 examples/s]
|
||
Normalizing raw HH preferences (train): 82%|████████▏ | 35675/43598 [00:03<00:00, 12334.37 examples/s]
|
||
Normalizing raw HH preferences (train): 77%|███████▋ | 33565/43598 [00:02<00:00, 12575.45 examples/s]
|
||
Normalizing raw HH preferences (train): 81%|████████ | 35280/43598 [00:02<00:00, 12410.21 examples/s]
|
||
Normalizing raw HH preferences (train): 75%|███████▌ | 32876/43598 [00:02<00:00, 12563.62 examples/s]
|
||
Normalizing raw HH preferences (train): 73%|███████▎ | 31951/43598 [00:02<00:00, 12424.46 examples/s]
|
||
Normalizing raw HH preferences (train): 68%|██████▊ | 29778/43598 [00:02<00:01, 12013.52 examples/s]
|
||
Normalizing raw HH preferences (train): 85%|████████▍ | 36922/43598 [00:03<00:00, 12366.63 examples/s]
|
||
Normalizing raw HH preferences (train): 82%|████████▏ | 35839/43598 [00:03<00:00, 12431.51 examples/s]
|
||
Normalizing raw HH preferences (train): 80%|███████▉ | 34851/43598 [00:02<00:00, 12654.86 examples/s]
|
||
Normalizing raw HH preferences (train): 84%|████████▍ | 36540/43598 [00:03<00:00, 12458.20 examples/s]
|
||
Normalizing raw HH preferences (train): 71%|███████ | 31000/43598 [00:02<00:01, 11892.11 examples/s]
|
||
Normalizing raw HH preferences (train): 79%|███████▉ | 34525/43598 [00:02<00:00, 12320.75 examples/s]
|
||
Normalizing raw HH preferences (train): 80%|███████▉ | 34760/43598 [00:03<00:00, 12558.82 examples/s]
|
||
Normalizing raw HH preferences (train): 78%|███████▊ | 33801/43598 [00:03<00:00, 12388.55 examples/s]
|
||
Normalizing raw HH preferences (train): 87%|████████▋ | 37823/43598 [00:03<00:00, 12556.12 examples/s]
|
||
Normalizing raw HH preferences (train): 74%|███████▍ | 32254/43598 [00:02<00:00, 12062.17 examples/s]
|
||
Normalizing raw HH preferences (train): 89%|████████▉ | 38763/43598 [00:03<00:00, 12330.55 examples/s]
|
||
Normalizing raw HH preferences (train): 86%|████████▋ | 37701/43598 [00:03<00:00, 12420.80 examples/s]
|
||
Normalizing raw HH preferences (train): 84%|████████▍ | 36700/43598 [00:03<00:00, 12527.52 examples/s]
|
||
Normalizing raw HH preferences (train): 83%|████████▎ | 36285/43598 [00:03<00:00, 12133.18 examples/s]
|
||
Normalizing raw HH preferences (train): 84%|████████▍ | 36708/43598 [00:03<00:00, 12538.11 examples/s]
|
||
Normalizing raw HH preferences (train): 82%|████████▏ | 35679/43598 [00:03<00:00, 12373.30 examples/s]
|
||
Normalizing raw HH preferences (train): 77%|███████▋ | 33480/43598 [00:03<00:00, 12114.55 examples/s]
|
||
Normalizing raw HH preferences (train): 89%|████████▉ | 38957/43598 [00:03<00:00, 12450.59 examples/s]
|
||
Normalizing raw HH preferences (train): 92%|█████████▏| 40000/43598 [00:03<00:00, 12103.02 examples/s]
|
||
Normalizing raw HH preferences (train): 87%|████████▋ | 37969/43598 [00:03<00:00, 12569.18 examples/s]
|
||
Normalizing raw HH preferences (train): 91%|█████████ | 39694/43598 [00:03<00:00, 12457.65 examples/s]
|
||
Normalizing raw HH preferences (train): 86%|████████▌ | 37554/43598 [00:03<00:00, 12258.67 examples/s]
|
||
Normalizing raw HH preferences (train): 87%|████████▋ | 37990/43598 [00:03<00:00, 12603.45 examples/s]
|
||
Normalizing raw HH preferences (train): 85%|████████▍ | 36931/43598 [00:03<00:00, 12408.98 examples/s]
|
||
Normalizing raw HH preferences (train): 80%|███████▉ | 34711/43598 [00:03<00:00, 12168.08 examples/s]
|
||
Normalizing raw HH preferences (train): 95%|█████████▍| 41263/43598 [00:03<00:00, 12239.80 examples/s]
|
||
Normalizing raw HH preferences (train): 94%|█████████▍| 40960/43598 [00:03<00:00, 12507.82 examples/s]
|
||
Normalizing raw HH preferences (train): 89%|████████▉ | 38810/43598 [00:03<00:00, 12330.48 examples/s]
|
||
Normalizing raw HH preferences (train): 94%|█████████▎| 40802/43598 [00:03<00:00, 12395.25 examples/s]
|
||
Normalizing raw HH preferences (train): 91%|█████████▏| 39828/43598 [00:03<00:00, 12502.23 examples/s]
|
||
Normalizing raw HH preferences (train): 98%|█████████▊| 42521/43598 [00:03<00:00, 12329.04 examples/s]
|
||
Normalizing raw HH preferences (train): 91%|█████████▏| 39866/43598 [00:03<00:00, 12567.28 examples/s]
|
||
Normalizing raw HH preferences (train): 89%|████████▉ | 38786/43598 [00:03<00:00, 12392.13 examples/s]
|
||
Normalizing raw HH preferences (train): 84%|████████▎ | 36490/43598 [00:03<00:00, 12047.84 examples/s]
|
||
Normalizing raw HH preferences (train): 98%|█████████▊| 42830/43598 [00:03<00:00, 12489.78 examples/s]
|
||
Normalizing raw HH preferences (train): 98%|█████████▊| 42694/43598 [00:03<00:00, 12409.29 examples/s]
|
||
Normalizing raw HH preferences (train): 93%|█████████▎| 40686/43598 [00:03<00:00, 12265.72 examples/s]
|
||
Normalizing raw HH preferences (train): 96%|█████████▌| 41712/43598 [00:03<00:00, 12517.62 examples/s]
|
||
Normalizing raw HH preferences (train): 87%|████████▋ | 37737/43598 [00:03<00:00, 12157.13 examples/s]
|
||
Normalizing raw HH preferences (train): 96%|█████████▌| 41700/43598 [00:03<00:00, 12454.81 examples/s]
|
||
Normalizing raw HH preferences (train): 93%|█████████▎| 40685/43598 [00:03<00:00, 12348.88 examples/s]
|
||
Normalizing raw HH preferences (train): 96%|█████████▌| 41949/43598 [00:03<00:00, 12353.60 examples/s]
|
||
Normalizing raw HH preferences (train): 99%|█████████▊| 42985/43598 [00:03<00:00, 12567.46 examples/s]
|
||
Normalizing raw HH preferences (train): 89%|████████▉ | 38961/43598 [00:03<00:00, 12177.12 examples/s]
|
||
Normalizing raw HH preferences (train): 96%|█████████▌| 41935/43598 [00:03<00:00, 12382.30 examples/s]
|
||
Normalizing raw HH preferences (train): 100%|█████████▉| 43428/43598 [00:03<00:00, 12158.52 examples/s]
|
||
Normalizing raw HH preferences (train): 93%|█████████▎| 40749/43598 [00:03<00:00, 12078.36 examples/s]
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10849.22 examples/s]
|
||
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 10300.08 examples/s]
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 10925.99 examples/s]
|
||
|
||
Normalizing raw HH preferences (train): 96%|█████████▋| 41975/43598 [00:03<00:00, 12122.67 examples/s]
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 10291.62 examples/s]
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10854.66 examples/s]
|
||
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 10941.75 examples/s]
|
||
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10740.95 examples/s]
|
||
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10538.17 examples/s]
|
||
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 10224.30 examples/s]
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10601.56 examples/s]
|
||
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10665.54 examples/s]
|
||
2026-04-10 23:31:38 - WARNING - __main__ - Dropped 15 non-canonical HH preference examples from split `test` before normalization (9 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 6 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
|
||
|
||
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (test): 47%|████▋ | 1103/2339 [00:00<00:00, 10986.63 examples/s]
|
||
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10835.94 examples/s]
|
||
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10002.41 examples/s]
|
||
2026-04-10 23:31:38 - INFO - __main__ - Training on the following splits: ['train : 43598', 'test : 2339']
|
||
[INFO|tokenization_utils_base.py:2058] 2026-04-10 23:31:38,617 >> loading file tokenizer.json
|
||
[INFO|tokenization_utils_base.py:2058] 2026-04-10 23:31:38,617 >> loading file tokenizer.model
|
||
[INFO|tokenization_utils_base.py:2058] 2026-04-10 23:31:38,617 >> loading file added_tokens.json
|
||
[INFO|tokenization_utils_base.py:2058] 2026-04-10 23:31:38,617 >> loading file special_tokens_map.json
|
||
[INFO|tokenization_utils_base.py:2058] 2026-04-10 23:31:38,617 >> loading file tokenizer_config.json
|
||
[INFO|tokenization_utils_base.py:2058] 2026-04-10 23:31:38,617 >> loading file chat_template.jinja
|
||
|
||
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (test): 48%|████▊ | 1115/2339 [00:00<00:00, 11106.11 examples/s]
|
||
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (test): 45%|████▌ | 1062/2339 [00:00<00:00, 10577.22 examples/s]
|
||
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (test): 49%|████▊ | 1138/2339 [00:00<00:00, 11331.11 examples/s]
|
||
Normalizing raw HH preferences (test): 49%|████▊ | 1138/2339 [00:00<00:00, 11331.17 examples/s]
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 11242.26 examples/s]
|
||
Normalizing raw HH preferences (test): 50%|████▉ | 1159/2339 [00:00<00:00, 11544.13 examples/s]
|
||
Normalizing raw HH preferences (test): 99%|█████████▉| 2323/2339 [00:00<00:00, 11767.84 examples/s]
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10462.47 examples/s]
|
||
|
||
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (test): 50%|████▉ | 1166/2339 [00:00<00:00, 11610.63 examples/s]
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 11452.02 examples/s]
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10275.53 examples/s]
|
||
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10612.48 examples/s]
|
||
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10844.34 examples/s]
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10038.58 examples/s]
|
||
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 11331.41 examples/s]
|
||
Normalizing raw HH preferences (test): 43%|████▎ | 1000/2339 [00:00<00:00, 9700.93 examples/s]
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 11377.17 examples/s]
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10280.62 examples/s]
|
||
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10522.00 examples/s]
|
||
|
||
Normalizing raw HH preferences (test): 94%|█████████▍| 2204/2339 [00:00<00:00, 11051.49 examples/s][INFO|tokenization_utils_base.py:2323] 2026-04-10 23:31:39,029 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
||
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 8941.64 examples/s]
|
||
2026-04-10 23:31:39 - INFO - __main__ - Processed train sample 41905:
|
||
|
||
Prompt:
|
||
<|begin_of_text|><|start_header_id|>user<|end_header_id|>
|
||
|
||
What is the best way to clean my refrigerator?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
||
|
||
Well, I have a few ideas, but the best way to know what works best is to run some tests and then analyze the data to figure out which technique works best.<|eot_id|><|start_header_id|>user<|end_header_id|>
|
||
|
||
What are your ideas?<|eot_id|>
|
||
|
||
Chosen:
|
||
<|start_header_id|>assistant<|end_header_id|>
|
||
|
||
Some of my ideas include wiping down the interior with a rag, using bleach and water to wipe down the door, emptying the contents of the fridge to scrub the shelves with a clean cloth, cleaning the interior of the fridge with vinegar and water, and using a vacuum cleaner to clean the interior.<|eot_id|>
|
||
|
||
Rejected:
|
||
<|start_header_id|>assistant<|end_header_id|>
|
||
|
||
I was thinking that you could try different types of soap, which could be a variation of “soap,” like antibacterial soap. I was also thinking that you could try using different tools, like a toothbrush, to clean the inside. Or I was thinking that you could try different ways to get the gunk out, like using a squeegee, or using a paper towel. I was also thinking you could try other types of cleaning, like vacuuming, but I think that could have the opposite of the desired effect.<|eot_id|>
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
||
warnings.warn(
|
||
[INFO|configuration_utils.py:691] 2026-04-10 23:31:39,350 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/config.json
|
||
[INFO|configuration_utils.py:765] 2026-04-10 23:31:39,351 >> Model config LlamaConfig {
|
||
"architectures": [
|
||
"LlamaForCausalLM"
|
||
],
|
||
"attention_bias": false,
|
||
"attention_dropout": 0.0,
|
||
"bos_token_id": 128000,
|
||
"eos_token_id": 128001,
|
||
"head_dim": 128,
|
||
"hidden_act": "silu",
|
||
"hidden_size": 4096,
|
||
"initializer_range": 0.02,
|
||
"intermediate_size": 14336,
|
||
"max_position_embeddings": 8192,
|
||
"mlp_bias": false,
|
||
"model_type": "llama",
|
||
"num_attention_heads": 32,
|
||
"num_hidden_layers": 32,
|
||
"num_key_value_heads": 8,
|
||
"pretraining_tp": 1,
|
||
"rms_norm_eps": 1e-05,
|
||
"rope_scaling": null,
|
||
"rope_theta": 500000.0,
|
||
"tie_word_embeddings": false,
|
||
"torch_dtype": "bfloat16",
|
||
"transformers_version": "4.51.0",
|
||
"use_cache": false,
|
||
"vocab_size": 128256
|
||
}
|
||
|
||
[INFO|modeling_utils.py:1121] 2026-04-10 23:31:39,359 >> loading weights file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/model.safetensors.index.json
|
||
[INFO|modeling_utils.py:2167] 2026-04-10 23:31:39,360 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.
|
||
[WARNING|logging.py:328] 2026-04-10 23:31:39,362 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
||
[INFO|configuration_utils.py:1142] 2026-04-10 23:31:39,363 >> Generate config GenerationConfig {
|
||
"bos_token_id": 128000,
|
||
"eos_token_id": 128001,
|
||
"use_cache": false
|
||
}
|
||
|
||
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
||
warnings.warn(
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
||
warnings.warn(
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
||
warnings.warn(
|
||
[WARNING|logging.py:328] 2026-04-10 23:31:39,557 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
||
[WARNING|logging.py:328] 2026-04-10 23:31:39,557 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
||
[WARNING|logging.py:328] 2026-04-10 23:31:39,558 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
||
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 689.90it/s]
|
||
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 657.65it/s]
|
||
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
||
warnings.warn(
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
||
warnings.warn(
|
||
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 657.94it/s]
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
||
warnings.warn(
|
||
[WARNING|logging.py:328] 2026-04-10 23:31:39,623 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
||
[WARNING|logging.py:328] 2026-04-10 23:31:39,627 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
||
[WARNING|logging.py:328] 2026-04-10 23:31:39,631 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
||
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 542.08it/s]
|
||
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 555.05it/s]
|
||
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 497.32it/s]
|
||
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
||
warnings.warn(
|
||
[WARNING|logging.py:328] 2026-04-10 23:31:39,780 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
||
|
||
Loading checkpoint shards: 14%|█▍ | 1/7 [00:00<00:00, 7.16it/s]
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 14%|█▍ | 1/7 [00:00<00:01, 5.57it/s]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 46.28it/s]
|
||
|
||
Loading checkpoint shards: 14%|█▍ | 1/7 [00:00<00:01, 5.59it/s][WARNING|trainer.py:821] 2026-04-10 23:31:39,825 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
||
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 36.93it/s]
|
||
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 507.30it/s]
|
||
[WARNING|trainer.py:821] 2026-04-10 23:31:39,831 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
||
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 37.35it/s]
|
||
[WARNING|trainer.py:821] 2026-04-10 23:31:39,834 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
||
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 14%|█▍ | 1/7 [00:00<00:01, 6.00it/s]
|
||
Loading checkpoint shards: 14%|█▍ | 1/7 [00:00<00:01, 5.86it/s]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 39.67it/s]
|
||
[WARNING|trainer.py:821] 2026-04-10 23:31:39,885 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
||
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 39.57it/s]
|
||
|
||
Loading checkpoint shards: 14%|█▍ | 1/7 [00:00<00:01, 5.72it/s][WARNING|trainer.py:821] 2026-04-10 23:31:39,890 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
||
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 38.48it/s]
|
||
[WARNING|trainer.py:821] 2026-04-10 23:31:39,900 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
||
|
||
Loading checkpoint shards: 14%|█▍ | 1/7 [00:00<00:00, 6.75it/s]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 45.19it/s]
|
||
[WARNING|trainer.py:821] 2026-04-10 23:31:40,022 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
||
|
||
Loading checkpoint shards: 14%|█▍ | 1/7 [00:01<00:07, 1.28s/it]
|
||
Loading checkpoint shards: 29%|██▊ | 2/7 [00:02<00:06, 1.23s/it]
|
||
Loading checkpoint shards: 43%|████▎ | 3/7 [00:03<00:04, 1.24s/it]
|
||
Loading checkpoint shards: 57%|█████▋ | 4/7 [00:04<00:03, 1.25s/it]
|
||
Loading checkpoint shards: 71%|███████▏ | 5/7 [00:06<00:02, 1.24s/it]
|
||
Loading checkpoint shards: 86%|████████▌ | 6/7 [00:07<00:01, 1.23s/it]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:08<00:00, 1.05s/it]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:08<00:00, 1.16s/it]
|
||
[INFO|modeling_utils.py:4926] 2026-04-10 23:31:47,490 >> All model checkpoint weights were used when initializing LlamaForCausalLM.
|
||
|
||
[INFO|modeling_utils.py:4934] 2026-04-10 23:31:47,490 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758.
|
||
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
|
||
[INFO|configuration_utils.py:1095] 2026-04-10 23:31:47,492 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/generation_config.json
|
||
[INFO|configuration_utils.py:1142] 2026-04-10 23:31:47,492 >> Generate config GenerationConfig {
|
||
"bos_token_id": 128000,
|
||
"do_sample": true,
|
||
"eos_token_id": 128001,
|
||
"max_length": 4096,
|
||
"temperature": 0.6,
|
||
"top_p": 0.9
|
||
}
|
||
|
||
[INFO|configuration_utils.py:691] 2026-04-10 23:31:47,493 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/config.json
|
||
[INFO|configuration_utils.py:765] 2026-04-10 23:31:47,494 >> Model config LlamaConfig {
|
||
"architectures": [
|
||
"LlamaForCausalLM"
|
||
],
|
||
"attention_bias": false,
|
||
"attention_dropout": 0.0,
|
||
"bos_token_id": 128000,
|
||
"eos_token_id": 128001,
|
||
"head_dim": 128,
|
||
"hidden_act": "silu",
|
||
"hidden_size": 4096,
|
||
"initializer_range": 0.02,
|
||
"intermediate_size": 14336,
|
||
"max_position_embeddings": 8192,
|
||
"mlp_bias": false,
|
||
"model_type": "llama",
|
||
"num_attention_heads": 32,
|
||
"num_hidden_layers": 32,
|
||
"num_key_value_heads": 8,
|
||
"pretraining_tp": 1,
|
||
"rms_norm_eps": 1e-05,
|
||
"rope_scaling": null,
|
||
"rope_theta": 500000.0,
|
||
"tie_word_embeddings": false,
|
||
"torch_dtype": "bfloat16",
|
||
"transformers_version": "4.51.0",
|
||
"use_cache": false,
|
||
"vocab_size": 128256
|
||
}
|
||
|
||
[INFO|modeling_utils.py:1121] 2026-04-10 23:31:47,495 >> loading weights file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/model.safetensors.index.json
|
||
[INFO|modeling_utils.py:2167] 2026-04-10 23:31:47,495 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.
|
||
[INFO|configuration_utils.py:1142] 2026-04-10 23:31:47,498 >> Generate config GenerationConfig {
|
||
"bos_token_id": 128000,
|
||
"eos_token_id": 128001,
|
||
"use_cache": false
|
||
}
|
||
|
||
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 14%|█▍ | 1/7 [00:01<00:07, 1.26s/it]
|
||
Loading checkpoint shards: 29%|██▊ | 2/7 [00:02<00:06, 1.22s/it]
|
||
Loading checkpoint shards: 43%|████▎ | 3/7 [00:03<00:04, 1.24s/it]
|
||
Loading checkpoint shards: 57%|█████▋ | 4/7 [00:04<00:03, 1.24s/it]
|
||
Loading checkpoint shards: 71%|███████▏ | 5/7 [00:06<00:02, 1.23s/it]
|
||
Loading checkpoint shards: 86%|████████▌ | 6/7 [00:07<00:01, 1.23s/it]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:08<00:00, 1.04s/it]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:08<00:00, 1.15s/it]
|
||
[INFO|modeling_utils.py:4926] 2026-04-10 23:31:55,706 >> All model checkpoint weights were used when initializing LlamaForCausalLM.
|
||
|
||
[INFO|modeling_utils.py:4934] 2026-04-10 23:31:55,706 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758.
|
||
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
|
||
[INFO|configuration_utils.py:1095] 2026-04-10 23:31:55,708 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/generation_config.json
|
||
[INFO|configuration_utils.py:1142] 2026-04-10 23:31:55,708 >> Generate config GenerationConfig {
|
||
"bos_token_id": 128000,
|
||
"do_sample": true,
|
||
"eos_token_id": 128001,
|
||
"max_length": 4096,
|
||
"temperature": 0.6,
|
||
"top_p": 0.9
|
||
}
|
||
|
||
[WARNING|trainer.py:821] 2026-04-10 23:31:55,709 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:31:55,710 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
|
||
Tokenizing train (num_proc=12): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Tokenizing train (num_proc=12): 0%| | 128/43598 [00:31<3:00:04, 4.02 examples/s]
|
||
Tokenizing train (num_proc=12): 1%| | 256/43598 [00:31<1:14:18, 9.72 examples/s]
|
||
Tokenizing train (num_proc=12): 1%| | 384/43598 [00:32<40:33, 17.76 examples/s]
|
||
Tokenizing train (num_proc=12): 1%| | 512/43598 [00:32<24:44, 29.03 examples/s]
|
||
Tokenizing train (num_proc=12): 1%|▏ | 640/43598 [00:32<16:02, 44.63 examples/s]
|
||
Tokenizing train (num_proc=12): 2%|▏ | 768/43598 [00:32<10:46, 66.22 examples/s]
|
||
Tokenizing train (num_proc=12): 2%|▏ | 896/43598 [00:32<07:27, 95.53 examples/s]
|
||
Tokenizing train (num_proc=12): 2%|▏ | 1024/43598 [00:32<05:15, 134.75 examples/s]
|
||
Tokenizing train (num_proc=12): 3%|▎ | 1152/43598 [00:32<03:48, 186.08 examples/s]
|
||
Tokenizing train (num_proc=12): 3%|▎ | 1280/43598 [00:32<02:48, 250.57 examples/s]
|
||
Tokenizing train (num_proc=12): 3%|▎ | 1408/43598 [00:32<02:07, 330.58 examples/s]
|
||
Tokenizing train (num_proc=12): 4%|▎ | 1536/43598 [00:33<01:39, 424.29 examples/s]
|
||
Tokenizing train (num_proc=12): 4%|▍ | 1664/43598 [00:33<01:19, 526.03 examples/s]
|
||
Tokenizing train (num_proc=12): 4%|▍ | 1792/43598 [00:33<01:06, 628.06 examples/s]
|
||
Tokenizing train (num_proc=12): 4%|▍ | 1920/43598 [00:33<00:58, 718.45 examples/s]
|
||
Tokenizing train (num_proc=12): 5%|▍ | 2048/43598 [00:33<00:51, 804.70 examples/s]
|
||
Tokenizing train (num_proc=12): 5%|▍ | 2176/43598 [00:33<00:47, 873.48 examples/s]
|
||
Tokenizing train (num_proc=12): 5%|▌ | 2304/43598 [00:33<00:44, 926.47 examples/s]
|
||
Tokenizing train (num_proc=12): 6%|▌ | 2432/43598 [00:33<00:42, 974.69 examples/s]
|
||
Tokenizing train (num_proc=12): 6%|▌ | 2560/43598 [00:34<00:42, 975.46 examples/s]
|
||
Tokenizing train (num_proc=12): 6%|▌ | 2688/43598 [00:34<00:40, 1015.16 examples/s]
|
||
Tokenizing train (num_proc=12): 6%|▋ | 2816/43598 [00:34<00:40, 1008.68 examples/s]
|
||
Tokenizing train (num_proc=12): 7%|▋ | 2944/43598 [00:34<00:39, 1040.27 examples/s]
|
||
Tokenizing train (num_proc=12): 7%|▋ | 3072/43598 [00:34<00:39, 1025.02 examples/s]
|
||
Tokenizing train (num_proc=12): 7%|▋ | 3200/43598 [00:34<00:39, 1025.58 examples/s]
|
||
Tokenizing train (num_proc=12): 8%|▊ | 3328/43598 [00:34<00:39, 1014.95 examples/s]
|
||
Tokenizing train (num_proc=12): 8%|▊ | 3456/43598 [00:34<00:38, 1032.40 examples/s]
|
||
Tokenizing train (num_proc=12): 8%|▊ | 3584/43598 [00:35<00:37, 1053.93 examples/s]
|
||
Tokenizing train (num_proc=12): 8%|▊ | 3634/43598 [00:50<00:37, 1053.93 examples/s]
|
||
Tokenizing train (num_proc=12): 9%|▊ | 3762/43598 [00:55<29:33, 22.47 examples/s]
|
||
Tokenizing train (num_proc=12): 9%|▉ | 3890/43598 [00:56<21:27, 30.84 examples/s]
|
||
Tokenizing train (num_proc=12): 9%|▉ | 4018/43598 [00:56<15:28, 42.63 examples/s]
|
||
Tokenizing train (num_proc=12): 10%|▉ | 4146/43598 [00:56<11:08, 59.03 examples/s]
|
||
Tokenizing train (num_proc=12): 10%|▉ | 4274/43598 [00:56<08:01, 81.67 examples/s]
|
||
Tokenizing train (num_proc=12): 10%|█ | 4402/43598 [00:56<05:48, 112.34 examples/s]
|
||
Tokenizing train (num_proc=12): 10%|█ | 4530/43598 [00:56<04:15, 153.13 examples/s]
|
||
Tokenizing train (num_proc=12): 11%|█ | 4658/43598 [00:56<03:08, 206.17 examples/s]
|
||
Tokenizing train (num_proc=12): 11%|█ | 4786/43598 [00:56<02:22, 271.85 examples/s]
|
||
Tokenizing train (num_proc=12): 11%|█▏ | 4914/43598 [00:56<01:51, 348.13 examples/s]
|
||
Tokenizing train (num_proc=12): 12%|█▏ | 5042/43598 [00:57<01:28, 438.12 examples/s]
|
||
Tokenizing train (num_proc=12): 12%|█▏ | 5170/43598 [00:57<01:11, 539.60 examples/s]
|
||
Tokenizing train (num_proc=12): 12%|█▏ | 5298/43598 [00:57<00:59, 642.16 examples/s]
|
||
Tokenizing train (num_proc=12): 13%|█▎ | 5554/43598 [00:57<00:46, 817.14 examples/s]
|
||
Tokenizing train (num_proc=12): 13%|█▎ | 5682/43598 [00:57<00:43, 872.40 examples/s]
|
||
Tokenizing train (num_proc=12): 13%|█▎ | 5810/43598 [00:57<00:40, 943.45 examples/s]
|
||
Tokenizing train (num_proc=12): 14%|█▎ | 5938/43598 [00:57<00:37, 993.29 examples/s]
|
||
Tokenizing train (num_proc=12): 14%|█▍ | 6066/43598 [00:57<00:36, 1037.98 examples/s]
|
||
Tokenizing train (num_proc=12): 14%|█▍ | 6194/43598 [00:58<00:35, 1060.75 examples/s]
|
||
Tokenizing train (num_proc=12): 15%|█▍ | 6322/43598 [00:58<00:35, 1046.27 examples/s]
|
||
Tokenizing train (num_proc=12): 15%|█▍ | 6450/43598 [00:58<00:35, 1057.53 examples/s]
|
||
Tokenizing train (num_proc=12): 15%|█▌ | 6578/43598 [00:58<00:34, 1070.81 examples/s]
|
||
Tokenizing train (num_proc=12): 15%|█▌ | 6706/43598 [00:58<00:34, 1061.72 examples/s]
|
||
Tokenizing train (num_proc=12): 16%|█▌ | 6834/43598 [00:58<00:34, 1066.53 examples/s]
|
||
Tokenizing train (num_proc=12): 16%|█▌ | 6962/43598 [00:58<00:34, 1062.64 examples/s]
|
||
Tokenizing train (num_proc=12): 16%|█▋ | 7090/43598 [00:58<00:34, 1063.77 examples/s]
|
||
Tokenizing train (num_proc=12): 17%|█▋ | 7218/43598 [00:59<00:33, 1072.52 examples/s]
|
||
Tokenizing train (num_proc=12): 17%|█▋ | 7268/43598 [01:11<00:33, 1072.52 examples/s]
|
||
Tokenizing train (num_proc=12): 17%|█▋ | 7396/43598 [01:20<27:23, 22.03 examples/s]
|
||
Tokenizing train (num_proc=12): 17%|█▋ | 7524/43598 [01:20<19:53, 30.23 examples/s]
|
||
Tokenizing train (num_proc=12): 18%|█▊ | 7652/43598 [01:20<14:21, 41.71 examples/s]
|
||
Tokenizing train (num_proc=12): 18%|█▊ | 7780/43598 [01:20<10:21, 57.66 examples/s]
|
||
Tokenizing train (num_proc=12): 18%|█▊ | 7908/43598 [01:20<07:27, 79.78 examples/s]
|
||
Tokenizing train (num_proc=12): 18%|█▊ | 8036/43598 [01:21<05:26, 109.05 examples/s]
|
||
Tokenizing train (num_proc=12): 19%|█▊ | 8164/43598 [01:21<04:00, 147.44 examples/s]
|
||
Tokenizing train (num_proc=12): 19%|█▉ | 8292/43598 [01:21<02:58, 197.55 examples/s]
|
||
Tokenizing train (num_proc=12): 19%|█▉ | 8420/43598 [01:21<02:15, 259.18 examples/s]
|
||
Tokenizing train (num_proc=12): 20%|█▉ | 8548/43598 [01:21<01:45, 333.35 examples/s]
|
||
Tokenizing train (num_proc=12): 20%|█▉ | 8676/43598 [01:21<01:22, 424.87 examples/s]
|
||
Tokenizing train (num_proc=12): 20%|██ | 8804/43598 [01:21<01:07, 517.58 examples/s]
|
||
Tokenizing train (num_proc=12): 20%|██ | 8932/43598 [01:21<00:55, 623.51 examples/s]
|
||
Tokenizing train (num_proc=12): 21%|██ | 9060/43598 [01:22<00:48, 712.58 examples/s]
|
||
Tokenizing train (num_proc=12): 21%|██ | 9188/43598 [01:22<00:43, 789.23 examples/s]
|
||
Tokenizing train (num_proc=12): 21%|██▏ | 9316/43598 [01:22<00:39, 859.83 examples/s]
|
||
Tokenizing train (num_proc=12): 22%|██▏ | 9444/43598 [01:22<00:37, 916.97 examples/s]
|
||
Tokenizing train (num_proc=12): 22%|██▏ | 9572/43598 [01:22<00:36, 931.66 examples/s]
|
||
Tokenizing train (num_proc=12): 22%|██▏ | 9700/43598 [01:22<00:34, 969.67 examples/s]
|
||
Tokenizing train (num_proc=12): 23%|██▎ | 9828/43598 [01:22<00:34, 985.38 examples/s]
|
||
Tokenizing train (num_proc=12): 23%|██▎ | 9956/43598 [01:22<00:33, 1002.92 examples/s]
|
||
Tokenizing train (num_proc=12): 23%|██▎ | 10084/43598 [01:23<00:34, 979.11 examples/s]
|
||
Tokenizing train (num_proc=12): 23%|██▎ | 10212/43598 [01:23<00:33, 994.20 examples/s]
|
||
Tokenizing train (num_proc=12): 24%|██▎ | 10340/43598 [01:23<00:32, 1024.54 examples/s]
|
||
Tokenizing train (num_proc=12): 24%|██▍ | 10468/43598 [01:23<00:31, 1057.40 examples/s]
|
||
Tokenizing train (num_proc=12): 24%|██▍ | 10596/43598 [01:23<00:30, 1067.23 examples/s]
|
||
Tokenizing train (num_proc=12): 25%|██▍ | 10724/43598 [01:23<00:30, 1063.85 examples/s]
|
||
Tokenizing train (num_proc=12): 25%|██▍ | 10852/43598 [01:23<00:30, 1056.84 examples/s]
|
||
Tokenizing train (num_proc=12): 25%|██▌ | 10901/43598 [01:41<00:30, 1056.84 examples/s]
|
||
Tokenizing train (num_proc=12): 25%|██▌ | 11029/43598 [01:45<24:56, 21.77 examples/s]
|
||
Tokenizing train (num_proc=12): 26%|██▌ | 11157/43598 [01:45<18:04, 29.92 examples/s]
|
||
Tokenizing train (num_proc=12): 26%|██▌ | 11285/43598 [01:45<13:02, 41.28 examples/s]
|
||
Tokenizing train (num_proc=12): 26%|██▌ | 11413/43598 [01:45<09:23, 57.07 examples/s]
|
||
Tokenizing train (num_proc=12): 26%|██▋ | 11541/43598 [01:45<06:47, 78.69 examples/s]
|
||
Tokenizing train (num_proc=12): 27%|██▋ | 11669/43598 [01:45<04:55, 107.91 examples/s]
|
||
Tokenizing train (num_proc=12): 27%|██▋ | 11797/43598 [01:46<03:36, 146.73 examples/s]
|
||
Tokenizing train (num_proc=12): 27%|██▋ | 11925/43598 [01:46<02:41, 196.62 examples/s]
|
||
Tokenizing train (num_proc=12): 28%|██▊ | 12053/43598 [01:46<02:02, 256.66 examples/s]
|
||
Tokenizing train (num_proc=12): 28%|██▊ | 12181/43598 [01:46<01:33, 336.45 examples/s]
|
||
Tokenizing train (num_proc=12): 28%|██▊ | 12309/43598 [01:46<01:13, 423.77 examples/s]
|
||
Tokenizing train (num_proc=12): 29%|██▊ | 12437/43598 [01:46<01:00, 515.28 examples/s]
|
||
Tokenizing train (num_proc=12): 29%|██▉ | 12565/43598 [01:46<00:51, 608.48 examples/s]
|
||
Tokenizing train (num_proc=12): 29%|██▉ | 12693/43598 [01:46<00:43, 718.67 examples/s]
|
||
Tokenizing train (num_proc=12): 29%|██▉ | 12821/43598 [01:46<00:38, 805.39 examples/s]
|
||
Tokenizing train (num_proc=12): 30%|██▉ | 12949/43598 [01:47<00:35, 869.52 examples/s]
|
||
Tokenizing train (num_proc=12): 30%|██▉ | 13077/43598 [01:47<00:32, 934.31 examples/s]
|
||
Tokenizing train (num_proc=12): 30%|███ | 13205/43598 [01:47<00:31, 961.78 examples/s]
|
||
Tokenizing train (num_proc=12): 31%|███ | 13333/43598 [01:47<00:31, 949.40 examples/s]
|
||
Tokenizing train (num_proc=12): 31%|███ | 13461/43598 [01:47<00:31, 963.87 examples/s]
|
||
Tokenizing train (num_proc=12): 31%|███ | 13589/43598 [01:47<00:30, 992.16 examples/s]
|
||
Tokenizing train (num_proc=12): 31%|███▏ | 13717/43598 [01:47<00:29, 1020.17 examples/s]
|
||
Tokenizing train (num_proc=12): 32%|███▏ | 13845/43598 [01:47<00:29, 1022.35 examples/s]
|
||
Tokenizing train (num_proc=12): 32%|███▏ | 13973/43598 [01:48<00:28, 1038.13 examples/s]
|
||
Tokenizing train (num_proc=12): 32%|███▏ | 14101/43598 [01:48<00:29, 1009.35 examples/s]
|
||
Tokenizing train (num_proc=12): 33%|███▎ | 14229/43598 [01:48<00:28, 1041.90 examples/s]
|
||
Tokenizing train (num_proc=12): 33%|███▎ | 14357/43598 [01:48<00:27, 1068.97 examples/s]
|
||
Tokenizing train (num_proc=12): 33%|███▎ | 14485/43598 [01:48<00:26, 1119.45 examples/s]
|
||
Tokenizing train (num_proc=12): 33%|███▎ | 14534/43598 [02:01<00:25, 1119.45 examples/s]
|
||
Tokenizing train (num_proc=12): 34%|███▎ | 14662/43598 [02:10<22:15, 21.66 examples/s]
|
||
Tokenizing train (num_proc=12): 34%|███▍ | 14790/43598 [02:10<16:07, 29.77 examples/s]
|
||
Tokenizing train (num_proc=12): 34%|███▍ | 14918/43598 [02:10<11:37, 41.09 examples/s]
|
||
Tokenizing train (num_proc=12): 35%|███▍ | 15046/43598 [02:10<08:22, 56.85 examples/s]
|
||
Tokenizing train (num_proc=12): 35%|███▍ | 15174/43598 [02:10<06:02, 78.42 examples/s]
|
||
Tokenizing train (num_proc=12): 35%|███▌ | 15302/43598 [02:10<04:22, 107.60 examples/s]
|
||
Tokenizing train (num_proc=12): 35%|███▌ | 15430/43598 [02:11<03:12, 146.37 examples/s]
|
||
Tokenizing train (num_proc=12): 36%|███▌ | 15558/43598 [02:11<02:22, 196.23 examples/s]
|
||
Tokenizing train (num_proc=12): 36%|███▌ | 15686/43598 [02:11<01:47, 259.48 examples/s]
|
||
Tokenizing train (num_proc=12): 36%|███▋ | 15814/43598 [02:11<01:22, 335.16 examples/s]
|
||
Tokenizing train (num_proc=12): 37%|███▋ | 15942/43598 [02:11<01:06, 413.93 examples/s]
|
||
Tokenizing train (num_proc=12): 37%|███▋ | 16070/43598 [02:11<00:54, 504.96 examples/s]
|
||
Tokenizing train (num_proc=12): 37%|███▋ | 16198/43598 [02:11<00:45, 597.06 examples/s]
|
||
Tokenizing train (num_proc=12): 37%|███▋ | 16326/43598 [02:11<00:39, 695.22 examples/s]
|
||
Tokenizing train (num_proc=12): 38%|███▊ | 16454/43598 [02:11<00:34, 796.69 examples/s]
|
||
Tokenizing train (num_proc=12): 38%|███▊ | 16582/43598 [02:12<00:31, 871.34 examples/s]
|
||
Tokenizing train (num_proc=12): 38%|███▊ | 16710/43598 [02:12<00:29, 914.14 examples/s]
|
||
Tokenizing train (num_proc=12): 39%|███▊ | 16838/43598 [02:12<00:27, 969.30 examples/s]
|
||
Tokenizing train (num_proc=12): 39%|███▉ | 16966/43598 [02:12<00:26, 1001.42 examples/s]
|
||
Tokenizing train (num_proc=12): 39%|███▉ | 17094/43598 [02:12<00:26, 1014.50 examples/s]
|
||
Tokenizing train (num_proc=12): 40%|███▉ | 17222/43598 [02:12<00:25, 1048.42 examples/s]
|
||
Tokenizing train (num_proc=12): 40%|███▉ | 17350/43598 [02:12<00:24, 1058.52 examples/s]
|
||
Tokenizing train (num_proc=12): 40%|████ | 17478/43598 [02:12<00:25, 1043.47 examples/s]
|
||
Tokenizing train (num_proc=12): 40%|████ | 17606/43598 [02:13<00:24, 1061.86 examples/s]
|
||
Tokenizing train (num_proc=12): 41%|████ | 17734/43598 [02:13<00:24, 1059.06 examples/s]
|
||
Tokenizing train (num_proc=12): 41%|████ | 17862/43598 [02:13<00:23, 1092.19 examples/s]
|
||
Tokenizing train (num_proc=12): 41%|████▏ | 17990/43598 [02:13<00:23, 1099.26 examples/s]
|
||
Tokenizing train (num_proc=12): 42%|████▏ | 18118/43598 [02:13<00:23, 1102.85 examples/s]
|
||
Tokenizing train (num_proc=12): 42%|████▏ | 18167/43598 [02:31<00:23, 1102.85 examples/s]
|
||
Tokenizing train (num_proc=12): 42%|████▏ | 18295/43598 [02:35<19:54, 21.18 examples/s]
|
||
Tokenizing train (num_proc=12): 42%|████▏ | 18423/43598 [02:35<14:24, 29.11 examples/s]
|
||
Tokenizing train (num_proc=12): 43%|████▎ | 18551/43598 [02:35<10:22, 40.21 examples/s]
|
||
Tokenizing train (num_proc=12): 43%|████▎ | 18679/43598 [02:36<07:27, 55.71 examples/s]
|
||
Tokenizing train (num_proc=12): 43%|████▎ | 18807/43598 [02:36<05:22, 76.96 examples/s]
|
||
Tokenizing train (num_proc=12): 43%|████▎ | 18935/43598 [02:36<03:53, 105.78 examples/s]
|
||
Tokenizing train (num_proc=12): 44%|████▎ | 19063/43598 [02:36<02:51, 143.14 examples/s]
|
||
Tokenizing train (num_proc=12): 44%|████▍ | 19191/43598 [02:36<02:06, 193.40 examples/s]
|
||
Tokenizing train (num_proc=12): 44%|████▍ | 19319/43598 [02:36<01:35, 253.46 examples/s]
|
||
Tokenizing train (num_proc=12): 45%|████▍ | 19447/43598 [02:36<01:14, 326.10 examples/s]
|
||
Tokenizing train (num_proc=12): 45%|████▍ | 19575/43598 [02:36<00:58, 410.30 examples/s]
|
||
Tokenizing train (num_proc=12): 45%|████▌ | 19703/43598 [02:37<00:46, 508.46 examples/s]
|
||
Tokenizing train (num_proc=12): 45%|████▌ | 19831/43598 [02:37<00:39, 605.87 examples/s]
|
||
Tokenizing train (num_proc=12): 46%|████▌ | 19959/43598 [02:37<00:34, 691.82 examples/s]
|
||
Tokenizing train (num_proc=12): 46%|████▌ | 20087/43598 [02:37<00:30, 778.12 examples/s]
|
||
Tokenizing train (num_proc=12): 46%|████▋ | 20215/43598 [02:37<00:27, 847.90 examples/s]
|
||
Tokenizing train (num_proc=12): 47%|████▋ | 20343/43598 [02:37<00:25, 922.31 examples/s]
|
||
Tokenizing train (num_proc=12): 47%|████▋ | 20599/43598 [02:37<00:22, 1038.26 examples/s]
|
||
Tokenizing train (num_proc=12): 48%|████▊ | 20727/43598 [02:37<00:21, 1055.58 examples/s]
|
||
Tokenizing train (num_proc=12): 48%|████▊ | 20855/43598 [02:38<00:21, 1057.61 examples/s]
|
||
Tokenizing train (num_proc=12): 48%|████▊ | 20983/43598 [02:38<00:20, 1098.68 examples/s]
|
||
Tokenizing train (num_proc=12): 48%|████▊ | 21111/43598 [02:38<00:19, 1139.37 examples/s]
|
||
Tokenizing train (num_proc=12): 49%|████▊ | 21239/43598 [02:38<00:19, 1169.61 examples/s]
|
||
Tokenizing train (num_proc=12): 49%|████▉ | 21495/43598 [02:38<00:18, 1178.23 examples/s]
|
||
Tokenizing train (num_proc=12): 50%|████▉ | 21623/43598 [02:38<00:19, 1130.59 examples/s]
|
||
Tokenizing train (num_proc=12): 50%|████▉ | 21751/43598 [02:38<00:19, 1113.92 examples/s]
|
||
Tokenizing train (num_proc=12): 50%|█████ | 21800/43598 [02:51<00:19, 1113.92 examples/s]
|
||
Tokenizing train (num_proc=12): 50%|█████ | 21928/43598 [03:01<15:45, 22.91 examples/s]
|
||
Tokenizing train (num_proc=12): 51%|█████ | 22056/43598 [03:01<11:43, 30.64 examples/s]
|
||
Tokenizing train (num_proc=12): 51%|█████ | 22184/43598 [03:01<08:35, 41.51 examples/s]
|
||
Tokenizing train (num_proc=12): 51%|█████ | 22312/43598 [03:01<06:15, 56.65 examples/s]
|
||
Tokenizing train (num_proc=12): 51%|█████▏ | 22440/43598 [03:01<04:33, 77.42 examples/s]
|
||
Tokenizing train (num_proc=12): 52%|█████▏ | 22568/43598 [03:02<03:19, 105.31 examples/s]
|
||
Tokenizing train (num_proc=12): 52%|█████▏ | 22696/43598 [03:02<02:26, 142.73 examples/s]
|
||
Tokenizing train (num_proc=12): 52%|█████▏ | 22824/43598 [03:02<01:49, 190.57 examples/s]
|
||
Tokenizing train (num_proc=12): 53%|█████▎ | 22952/43598 [03:02<01:22, 249.92 examples/s]
|
||
Tokenizing train (num_proc=12): 53%|█████▎ | 23080/43598 [03:02<01:03, 321.06 examples/s]
|
||
Tokenizing train (num_proc=12): 53%|█████▎ | 23208/43598 [03:02<00:50, 406.45 examples/s]
|
||
Tokenizing train (num_proc=12): 54%|█████▎ | 23336/43598 [03:02<00:40, 499.87 examples/s]
|
||
Tokenizing train (num_proc=12): 54%|█████▍ | 23464/43598 [03:02<00:33, 592.83 examples/s]
|
||
Tokenizing train (num_proc=12): 54%|█████▍ | 23592/43598 [03:03<00:29, 678.41 examples/s]
|
||
Tokenizing train (num_proc=12): 54%|█████▍ | 23720/43598 [03:03<00:26, 753.84 examples/s]
|
||
Tokenizing train (num_proc=12): 55%|█████▍ | 23848/43598 [03:03<00:23, 825.29 examples/s]
|
||
Tokenizing train (num_proc=12): 55%|█████▍ | 23976/43598 [03:03<00:22, 880.97 examples/s]
|
||
Tokenizing train (num_proc=12): 55%|█████▌ | 24104/43598 [03:03<00:21, 924.09 examples/s]
|
||
Tokenizing train (num_proc=12): 56%|█████▌ | 24232/43598 [03:03<00:20, 959.54 examples/s]
|
||
Tokenizing train (num_proc=12): 56%|█████▌ | 24360/43598 [03:03<00:19, 967.13 examples/s]
|
||
Tokenizing train (num_proc=12): 56%|█████▌ | 24488/43598 [03:03<00:19, 989.22 examples/s]
|
||
Tokenizing train (num_proc=12): 56%|█████▋ | 24616/43598 [03:04<00:18, 1003.85 examples/s]
|
||
Tokenizing train (num_proc=12): 57%|█████▋ | 24744/43598 [03:04<00:18, 1016.40 examples/s]
|
||
Tokenizing train (num_proc=12): 57%|█████▋ | 24872/43598 [03:04<00:18, 1023.31 examples/s]
|
||
Tokenizing train (num_proc=12): 57%|█████▋ | 25000/43598 [03:04<00:17, 1040.67 examples/s]
|
||
Tokenizing train (num_proc=12): 58%|█████▊ | 25128/43598 [03:04<00:18, 1016.89 examples/s]
|
||
Tokenizing train (num_proc=12): 58%|█████▊ | 25256/43598 [03:04<00:18, 1002.89 examples/s]
|
||
Tokenizing train (num_proc=12): 58%|█████▊ | 25384/43598 [03:04<00:18, 986.26 examples/s]
|
||
Tokenizing train (num_proc=12): 58%|█████▊ | 25433/43598 [03:21<00:18, 986.26 examples/s]
|
||
Tokenizing train (num_proc=12): 59%|█████▊ | 25561/43598 [03:27<14:30, 20.72 examples/s]
|
||
Tokenizing train (num_proc=12): 59%|█████▉ | 25689/43598 [03:27<10:29, 28.47 examples/s]
|
||
Tokenizing train (num_proc=12): 59%|█████▉ | 25817/43598 [03:27<07:32, 39.32 examples/s]
|
||
Tokenizing train (num_proc=12): 60%|█████▉ | 25945/43598 [03:27<05:23, 54.52 examples/s]
|
||
Tokenizing train (num_proc=12): 60%|█████▉ | 26073/43598 [03:27<03:52, 75.40 examples/s]
|
||
Tokenizing train (num_proc=12): 60%|██████ | 26201/43598 [03:28<02:47, 103.80 examples/s]
|
||
Tokenizing train (num_proc=12): 60%|██████ | 26329/43598 [03:28<02:01, 141.57 examples/s]
|
||
Tokenizing train (num_proc=12): 61%|██████ | 26457/43598 [03:28<01:29, 191.15 examples/s]
|
||
Tokenizing train (num_proc=12): 61%|██████ | 26585/43598 [03:28<01:07, 253.59 examples/s]
|
||
Tokenizing train (num_proc=12): 61%|██████▏ | 26713/43598 [03:28<00:51, 328.54 examples/s]
|
||
Tokenizing train (num_proc=12): 62%|██████▏ | 26841/43598 [03:28<00:40, 414.16 examples/s]
|
||
Tokenizing train (num_proc=12): 62%|██████▏ | 26969/43598 [03:28<00:32, 509.65 examples/s]
|
||
Tokenizing train (num_proc=12): 62%|██████▏ | 27097/43598 [03:28<00:27, 602.10 examples/s]
|
||
Tokenizing train (num_proc=12): 62%|██████▏ | 27225/43598 [03:29<00:23, 697.92 examples/s]
|
||
Tokenizing train (num_proc=12): 63%|██████▎ | 27353/43598 [03:29<00:20, 783.33 examples/s]
|
||
Tokenizing train (num_proc=12): 63%|██████▎ | 27481/43598 [03:29<00:18, 868.15 examples/s]
|
||
Tokenizing train (num_proc=12): 63%|██████▎ | 27609/43598 [03:29<00:17, 917.80 examples/s]
|
||
Tokenizing train (num_proc=12): 64%|██████▎ | 27737/43598 [03:29<00:16, 963.00 examples/s]
|
||
Tokenizing train (num_proc=12): 64%|██████▍ | 27865/43598 [03:29<00:16, 971.86 examples/s]
|
||
Tokenizing train (num_proc=12): 64%|██████▍ | 27993/43598 [03:29<00:15, 990.42 examples/s]
|
||
Tokenizing train (num_proc=12): 65%|██████▍ | 28121/43598 [03:29<00:15, 997.98 examples/s]
|
||
Tokenizing train (num_proc=12): 65%|██████▍ | 28249/43598 [03:29<00:15, 999.76 examples/s]
|
||
Tokenizing train (num_proc=12): 65%|██████▌ | 28377/43598 [03:30<00:14, 1017.44 examples/s]
|
||
Tokenizing train (num_proc=12): 65%|██████▌ | 28505/43598 [03:30<00:14, 1031.97 examples/s]
|
||
Tokenizing train (num_proc=12): 66%|██████▌ | 28633/43598 [03:30<00:14, 1052.49 examples/s]
|
||
Tokenizing train (num_proc=12): 66%|██████▌ | 28761/43598 [03:30<00:14, 1041.82 examples/s]
|
||
Tokenizing train (num_proc=12): 66%|██████▋ | 28889/43598 [03:30<00:14, 1011.44 examples/s]
|
||
Tokenizing train (num_proc=12): 67%|██████▋ | 29017/43598 [03:30<00:14, 977.74 examples/s]
|
||
Tokenizing train (num_proc=12): 67%|██████▋ | 29066/43598 [03:41<00:14, 977.74 examples/s]
|
||
Tokenizing train (num_proc=12): 67%|██████▋ | 29194/43598 [03:53<11:36, 20.67 examples/s]
|
||
Tokenizing train (num_proc=12): 67%|██████▋ | 29322/43598 [03:53<08:22, 28.41 examples/s]
|
||
Tokenizing train (num_proc=12): 68%|██████▊ | 29450/43598 [03:53<06:00, 39.24 examples/s]
|
||
Tokenizing train (num_proc=12): 68%|██████▊ | 29578/43598 [03:53<04:18, 54.31 examples/s]
|
||
Tokenizing train (num_proc=12): 68%|██████▊ | 29706/43598 [03:53<03:04, 75.16 examples/s]
|
||
Tokenizing train (num_proc=12): 68%|██████▊ | 29834/43598 [03:54<02:12, 103.55 examples/s]
|
||
Tokenizing train (num_proc=12): 69%|██████▊ | 29962/43598 [03:54<01:36, 141.36 examples/s]
|
||
Tokenizing train (num_proc=12): 69%|██████▉ | 30090/43598 [03:54<01:10, 190.33 examples/s]
|
||
Tokenizing train (num_proc=12): 69%|██████▉ | 30218/43598 [03:54<00:53, 251.37 examples/s]
|
||
Tokenizing train (num_proc=12): 70%|██████▉ | 30346/43598 [03:54<00:40, 328.42 examples/s]
|
||
Tokenizing train (num_proc=12): 70%|██████▉ | 30474/43598 [03:54<00:32, 409.70 examples/s]
|
||
Tokenizing train (num_proc=12): 70%|███████ | 30602/43598 [03:54<00:25, 503.97 examples/s]
|
||
Tokenizing train (num_proc=12): 70%|███████ | 30730/43598 [03:54<00:21, 607.84 examples/s]
|
||
Tokenizing train (num_proc=12): 71%|███████ | 30858/43598 [03:55<00:18, 684.43 examples/s]
|
||
Tokenizing train (num_proc=12): 71%|███████ | 30986/43598 [03:55<00:16, 773.70 examples/s]
|
||
Tokenizing train (num_proc=12): 71%|███████▏ | 31114/43598 [03:55<00:14, 834.67 examples/s]
|
||
Tokenizing train (num_proc=12): 72%|███████▏ | 31242/43598 [03:55<00:13, 894.98 examples/s]
|
||
Tokenizing train (num_proc=12): 72%|███████▏ | 31370/43598 [03:55<00:12, 952.24 examples/s]
|
||
Tokenizing train (num_proc=12): 72%|███████▏ | 31498/43598 [03:55<00:12, 986.44 examples/s]
|
||
Tokenizing train (num_proc=12): 73%|███████▎ | 31626/43598 [03:55<00:11, 999.09 examples/s]
|
||
Tokenizing train (num_proc=12): 73%|███████▎ | 31754/43598 [03:55<00:11, 1048.89 examples/s]
|
||
Tokenizing train (num_proc=12): 73%|███████▎ | 31882/43598 [03:55<00:11, 1032.62 examples/s]
|
||
Tokenizing train (num_proc=12): 73%|███████▎ | 32010/43598 [03:56<00:11, 1035.80 examples/s]
|
||
Tokenizing train (num_proc=12): 74%|███████▎ | 32138/43598 [03:56<00:10, 1046.31 examples/s]
|
||
Tokenizing train (num_proc=12): 74%|███████▍ | 32266/43598 [03:56<00:11, 1017.59 examples/s]
|
||
Tokenizing train (num_proc=12): 74%|███████▍ | 32394/43598 [03:56<00:10, 1060.49 examples/s]
|
||
Tokenizing train (num_proc=12): 75%|███████▍ | 32522/43598 [03:56<00:10, 1036.52 examples/s]
|
||
Tokenizing train (num_proc=12): 75%|███████▍ | 32650/43598 [03:56<00:10, 1005.40 examples/s]
|
||
Tokenizing train (num_proc=12): 75%|███████▌ | 32699/43598 [04:12<00:10, 1005.40 examples/s]
|
||
Tokenizing train (num_proc=12): 75%|███████▌ | 32827/43598 [04:18<08:23, 21.38 examples/s]
|
||
Tokenizing train (num_proc=12): 76%|███████▌ | 32955/43598 [04:18<06:02, 29.38 examples/s]
|
||
Tokenizing train (num_proc=12): 76%|███████▌ | 33083/43598 [04:18<04:18, 40.62 examples/s]
|
||
Tokenizing train (num_proc=12): 76%|███████▌ | 33211/43598 [04:19<03:04, 56.31 examples/s]
|
||
Tokenizing train (num_proc=12): 76%|███████▋ | 33339/43598 [04:19<02:11, 77.93 examples/s]
|
||
Tokenizing train (num_proc=12): 77%|███████▋ | 33467/43598 [04:19<01:34, 107.27 examples/s]
|
||
Tokenizing train (num_proc=12): 77%|███████▋ | 33595/43598 [04:19<01:08, 146.51 examples/s]
|
||
Tokenizing train (num_proc=12): 77%|███████▋ | 33723/43598 [04:19<00:50, 197.17 examples/s]
|
||
Tokenizing train (num_proc=12): 78%|███████▊ | 33851/43598 [04:19<00:37, 259.90 examples/s]
|
||
Tokenizing train (num_proc=12): 78%|███████▊ | 33979/43598 [04:19<00:28, 335.29 examples/s]
|
||
Tokenizing train (num_proc=12): 78%|███████▊ | 34107/43598 [04:19<00:22, 416.74 examples/s]
|
||
Tokenizing train (num_proc=12): 79%|███████▊ | 34235/43598 [04:19<00:18, 514.59 examples/s]
|
||
Tokenizing train (num_proc=12): 79%|███████▉ | 34363/43598 [04:20<00:15, 606.63 examples/s]
|
||
Tokenizing train (num_proc=12): 79%|███████▉ | 34491/43598 [04:20<00:12, 703.81 examples/s]
|
||
Tokenizing train (num_proc=12): 79%|███████▉ | 34619/43598 [04:20<00:11, 779.82 examples/s]
|
||
Tokenizing train (num_proc=12): 80%|███████▉ | 34747/43598 [04:20<00:10, 853.66 examples/s]
|
||
Tokenizing train (num_proc=12): 80%|███████▉ | 34875/43598 [04:20<00:09, 905.16 examples/s]
|
||
Tokenizing train (num_proc=12): 80%|████████ | 35003/43598 [04:20<00:08, 974.33 examples/s]
|
||
Tokenizing train (num_proc=12): 81%|████████ | 35131/43598 [04:20<00:08, 1018.89 examples/s]
|
||
Tokenizing train (num_proc=12): 81%|████████ | 35259/43598 [04:20<00:08, 1019.10 examples/s]
|
||
Tokenizing train (num_proc=12): 81%|████████ | 35387/43598 [04:21<00:07, 1049.96 examples/s]
|
||
Tokenizing train (num_proc=12): 81%|████████▏ | 35515/43598 [04:21<00:07, 1068.35 examples/s]
|
||
Tokenizing train (num_proc=12): 82%|████████▏ | 35643/43598 [04:21<00:07, 1068.07 examples/s]
|
||
Tokenizing train (num_proc=12): 82%|████████▏ | 35771/43598 [04:21<00:07, 1096.63 examples/s]
|
||
Tokenizing train (num_proc=12): 82%|████████▏ | 35899/43598 [04:21<00:07, 1090.41 examples/s]
|
||
Tokenizing train (num_proc=12): 83%|████████▎ | 36027/43598 [04:21<00:06, 1081.58 examples/s]
|
||
Tokenizing train (num_proc=12): 83%|████████▎ | 36155/43598 [04:21<00:06, 1111.73 examples/s]
|
||
Tokenizing train (num_proc=12): 83%|████████▎ | 36283/43598 [04:21<00:06, 1127.36 examples/s]
|
||
Tokenizing train (num_proc=12): 83%|████████▎ | 36332/43598 [04:32<00:06, 1127.36 examples/s]
|
||
Tokenizing train (num_proc=12): 84%|████████▎ | 36460/43598 [04:43<05:22, 22.14 examples/s]
|
||
Tokenizing train (num_proc=12): 84%|████████▍ | 36588/43598 [04:43<03:50, 30.43 examples/s]
|
||
Tokenizing train (num_proc=12): 84%|████████▍ | 36716/43598 [04:43<02:43, 42.06 examples/s]
|
||
Tokenizing train (num_proc=12): 85%|████████▍ | 36844/43598 [04:43<01:55, 58.25 examples/s]
|
||
Tokenizing train (num_proc=12): 85%|████████▍ | 36972/43598 [04:43<01:22, 80.59 examples/s]
|
||
Tokenizing train (num_proc=12): 85%|████████▌ | 37100/43598 [04:43<00:58, 110.74 examples/s]
|
||
Tokenizing train (num_proc=12): 85%|████████▌ | 37228/43598 [04:43<00:42, 151.17 examples/s]
|
||
Tokenizing train (num_proc=12): 86%|████████▌ | 37356/43598 [04:43<00:30, 202.76 examples/s]
|
||
Tokenizing train (num_proc=12): 86%|████████▌ | 37484/43598 [04:43<00:22, 268.63 examples/s]
|
||
Tokenizing train (num_proc=12): 86%|████████▋ | 37612/43598 [04:44<00:17, 345.16 examples/s]
|
||
Tokenizing train (num_proc=12): 87%|████████▋ | 37740/43598 [04:44<00:13, 435.33 examples/s]
|
||
Tokenizing train (num_proc=12): 87%|████████▋ | 37868/43598 [04:44<00:10, 530.65 examples/s]
|
||
Tokenizing train (num_proc=12): 87%|████████▋ | 37996/43598 [04:44<00:09, 622.39 examples/s]
|
||
Tokenizing train (num_proc=12): 87%|████████▋ | 38124/43598 [04:44<00:07, 729.54 examples/s]
|
||
Tokenizing train (num_proc=12): 88%|████████▊ | 38252/43598 [04:44<00:06, 797.33 examples/s]
|
||
Tokenizing train (num_proc=12): 88%|████████▊ | 38380/43598 [04:44<00:05, 870.39 examples/s]
|
||
Tokenizing train (num_proc=12): 88%|████████▊ | 38508/43598 [04:44<00:05, 925.51 examples/s]
|
||
Tokenizing train (num_proc=12): 89%|████████▊ | 38636/43598 [04:45<00:04, 1002.42 examples/s]
|
||
Tokenizing train (num_proc=12): 89%|████████▉ | 38764/43598 [04:45<00:04, 1025.18 examples/s]
|
||
Tokenizing train (num_proc=12): 89%|████████▉ | 38892/43598 [04:45<00:04, 1058.18 examples/s]
|
||
Tokenizing train (num_proc=12): 89%|████████▉ | 39020/43598 [04:45<00:04, 1048.69 examples/s]
|
||
Tokenizing train (num_proc=12): 90%|████████▉ | 39148/43598 [04:45<00:04, 1068.74 examples/s]
|
||
Tokenizing train (num_proc=12): 90%|█████████ | 39276/43598 [04:45<00:04, 1078.98 examples/s]
|
||
Tokenizing train (num_proc=12): 90%|█████████ | 39404/43598 [04:45<00:03, 1083.25 examples/s]
|
||
Tokenizing train (num_proc=12): 91%|█████████ | 39532/43598 [04:45<00:03, 1082.03 examples/s]
|
||
Tokenizing train (num_proc=12): 91%|█████████ | 39660/43598 [04:45<00:03, 1038.52 examples/s]
|
||
Tokenizing train (num_proc=12): 91%|█████████▏| 39788/43598 [04:46<00:03, 1088.09 examples/s]
|
||
Tokenizing train (num_proc=12): 92%|█████████▏| 39916/43598 [04:46<00:03, 1061.56 examples/s]
|
||
Tokenizing train (num_proc=12): 92%|█████████▏| 39965/43598 [05:02<00:03, 1061.56 examples/s]
|
||
Tokenizing train (num_proc=12): 92%|█████████▏| 40093/43598 [05:05<02:26, 23.94 examples/s]
|
||
Tokenizing train (num_proc=12): 92%|█████████▏| 40221/43598 [05:05<01:42, 32.89 examples/s]
|
||
Tokenizing train (num_proc=12): 93%|█████████▎| 40349/43598 [05:05<01:11, 45.46 examples/s]
|
||
Tokenizing train (num_proc=12): 93%|█████████▎| 40477/43598 [05:06<00:49, 62.92 examples/s]
|
||
Tokenizing train (num_proc=12): 93%|█████████▎| 40605/43598 [05:06<00:34, 86.97 examples/s]
|
||
Tokenizing train (num_proc=12): 93%|█████████▎| 40733/43598 [05:06<00:23, 119.81 examples/s]
|
||
Tokenizing train (num_proc=12): 94%|█████████▍| 40989/43598 [05:06<00:12, 204.28 examples/s]
|
||
Tokenizing train (num_proc=12): 94%|█████████▍| 41117/43598 [05:06<00:09, 256.31 examples/s]
|
||
Tokenizing train (num_proc=12): 95%|█████████▍| 41245/43598 [05:06<00:07, 320.06 examples/s]
|
||
Tokenizing train (num_proc=12): 95%|█████████▍| 41373/43598 [05:06<00:05, 399.80 examples/s]
|
||
Tokenizing train (num_proc=12): 95%|█████████▌| 41501/43598 [05:06<00:04, 491.45 examples/s]
|
||
Tokenizing train (num_proc=12): 95%|█████████▌| 41629/43598 [05:07<00:03, 590.53 examples/s]
|
||
Tokenizing train (num_proc=12): 96%|█████████▌| 41757/43598 [05:07<00:02, 692.11 examples/s]
|
||
Tokenizing train (num_proc=12): 96%|█████████▌| 41885/43598 [05:07<00:02, 795.94 examples/s]
|
||
Tokenizing train (num_proc=12): 96%|█████████▋| 42013/43598 [05:07<00:01, 886.54 examples/s]
|
||
Tokenizing train (num_proc=12): 97%|█████████▋| 42141/43598 [05:07<00:01, 957.03 examples/s]
|
||
Tokenizing train (num_proc=12): 97%|█████████▋| 42269/43598 [05:07<00:01, 1022.13 examples/s]
|
||
Tokenizing train (num_proc=12): 98%|█████████▊| 42525/43598 [05:07<00:00, 1117.71 examples/s]
|
||
Tokenizing train (num_proc=12): 98%|█████████▊| 42653/43598 [05:07<00:00, 1129.31 examples/s]
|
||
Tokenizing train (num_proc=12): 98%|█████████▊| 42781/43598 [05:07<00:00, 1158.20 examples/s]
|
||
Tokenizing train (num_proc=12): 98%|█████████▊| 42909/43598 [05:08<00:00, 1182.36 examples/s]
|
||
Tokenizing train (num_proc=12): 99%|█████████▊| 43037/43598 [05:08<00:00, 1188.55 examples/s]
|
||
Tokenizing train (num_proc=12): 99%|█████████▉| 43293/43598 [05:08<00:00, 1280.64 examples/s]
|
||
Tokenizing train (num_proc=12): 100%|█████████▉| 43549/43598 [05:08<00:00, 1253.56 examples/s]Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfsbcd1ded41614a8ea00001e63'
|
||
|
||
Tokenizing train (num_proc=12): 100%|██████████| 43598/43598 [05:08<00:00, 141.15 examples/s]
|
||
[WARNING|trainer.py:816] 2026-04-10 23:37:47,834 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
|
||
Saving the dataset (0/2 shards): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Saving the dataset (0/2 shards): 16%|█▌ | 7000/43598 [00:00<00:00, 51188.66 examples/s]
|
||
Saving the dataset (0/2 shards): 30%|██▉ | 13000/43598 [00:00<00:00, 51526.44 examples/s]
|
||
Saving the dataset (0/2 shards): 46%|████▌ | 20000/43598 [00:00<00:00, 55253.33 examples/s]
|
||
Saving the dataset (1/2 shards): 50%|█████ | 21799/43598 [00:00<00:00, 55253.33 examples/s]
|
||
Saving the dataset (1/2 shards): 59%|█████▉ | 25799/43598 [00:00<00:00, 29586.40 examples/s]
|
||
Saving the dataset (1/2 shards): 78%|███████▊ | 33799/43598 [00:00<00:00, 38585.46 examples/s]
|
||
Saving the dataset (1/2 shards): 91%|█████████▏| 39799/43598 [00:00<00:00, 40571.99 examples/s]
|
||
Saving the dataset (2/2 shards): 100%|██████████| 43598/43598 [00:01<00:00, 40571.99 examples/s]
|
||
Saving the dataset (2/2 shards): 100%|██████████| 43598/43598 [00:01<00:00, 32295.33 examples/s]
|
||
[WARNING|trainer.py:816] 2026-04-10 23:37:50,336 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
|
||
Tokenizing test (num_proc=12): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Tokenizing test (num_proc=12): 5%|▌ | 128/2339 [00:32<09:16, 3.97 examples/s]
|
||
Tokenizing test (num_proc=12): 14%|█▍ | 323/2339 [00:56<05:34, 6.02 examples/s]
|
||
Tokenizing test (num_proc=12): 22%|██▏ | 518/2339 [01:22<04:30, 6.74 examples/s]
|
||
Tokenizing test (num_proc=12): 30%|███ | 713/2339 [01:46<03:46, 7.18 examples/s]
|
||
Tokenizing test (num_proc=12): 39%|███▉ | 908/2339 [02:12<03:14, 7.37 examples/s]
|
||
Tokenizing test (num_proc=12): 47%|████▋ | 1103/2339 [02:37<02:44, 7.51 examples/s]
|
||
Tokenizing test (num_proc=12): 55%|█████▌ | 1298/2339 [03:02<02:17, 7.57 examples/s]
|
||
Tokenizing test (num_proc=12): 64%|██████▍ | 1493/2339 [03:28<01:51, 7.59 examples/s]
|
||
Tokenizing test (num_proc=12): 72%|███████▏ | 1688/2339 [03:54<01:25, 7.57 examples/s]
|
||
Tokenizing test (num_proc=12): 81%|████████ | 1883/2339 [04:19<01:00, 7.56 examples/s]
|
||
Tokenizing test (num_proc=12): 89%|████████▉ | 2078/2339 [04:46<00:34, 7.52 examples/s]
|
||
Tokenizing test (num_proc=12): 97%|█████████▋| 2273/2339 [05:11<00:08, 7.60 examples/s]Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfs934857c9318be11600001e64'
|
||
|
||
Tokenizing test (num_proc=12): 100%|██████████| 2339/2339 [05:11<00:00, 7.51 examples/s]
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:39,489 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
|
||
Saving the dataset (0/1 shards): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Saving the dataset (1/1 shards): 100%|██████████| 2339/2339 [00:00<00:00, 31677.97 examples/s]
|
||
Saving the dataset (1/1 shards): 100%|██████████| 2339/2339 [00:00<00:00, 31622.22 examples/s]
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `EpsilonDPOTrainer.__init__`. Use `processing_class` instead.
|
||
super().__init__(
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,305 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,305 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,306 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,306 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,306 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,307 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,307 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,532 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,532 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,532 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,532 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,532 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,532 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,532 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,532 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,532 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,533 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,533 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,533 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,533 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,533 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,551 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `EpsilonDPOTrainer.__init__`. Use `processing_class` instead.
|
||
super().__init__(
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,551 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,551 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `EpsilonDPOTrainer.__init__`. Use `processing_class` instead.
|
||
super().__init__(
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,551 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `EpsilonDPOTrainer.__init__`. Use `processing_class` instead.
|
||
super().__init__(
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,551 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,551 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `EpsilonDPOTrainer.__init__`. Use `processing_class` instead.
|
||
super().__init__(
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `EpsilonDPOTrainer.__init__`. Use `processing_class` instead.
|
||
super().__init__(
|
||
[WARNING|trainer.py:816] 2026-04-10 23:43:42,551 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `EpsilonDPOTrainer.__init__`. Use `processing_class` instead.
|
||
super().__init__(
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `EpsilonDPOTrainer.__init__`. Use `processing_class` instead.
|
||
super().__init__(
|
||
[INFO|trainer.py:748] 2026-04-10 23:43:42,585 >> Using auto half precision backend
|
||
/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaForCausalLM because mixed precision turned on in FSDP. Affects: model.embed_tokens.weight, model.norm.weight, lm_head.weight.
|
||
warnings.warn(
|
||
/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaDecoderLayer because mixed precision turned on in FSDP. Affects: self_attn.q_proj.weight, self_attn.k_proj.weight, self_attn.v_proj.weight, self_attn.o_proj.weight, mlp.gate_proj.weight, mlp.up_proj.weight, mlp.down_proj.weight, input_layernorm.weight, post_attention_layernorm.weight.
|
||
warnings.warn(
|
||
/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/accelerate/accelerator.py:1563: UserWarning: FSDP upcast of low precision parameters may affect the precision of model checkpoints.
|
||
warnings.warn(
|
||
[INFO|trainer.py:2414] 2026-04-10 23:43:47,713 >> ***** Running training *****
|
||
[INFO|trainer.py:2415] 2026-04-10 23:43:47,713 >> Num examples = 43,598
|
||
[INFO|trainer.py:2416] 2026-04-10 23:43:47,713 >> Num Epochs = 1
|
||
[INFO|trainer.py:2417] 2026-04-10 23:43:47,713 >> Instantaneous batch size per device = 16
|
||
[INFO|trainer.py:2420] 2026-04-10 23:43:47,713 >> Total train batch size (w. parallel, distributed & accumulation) = 128
|
||
[INFO|trainer.py:2421] 2026-04-10 23:43:47,713 >> Gradient Accumulation steps = 1
|
||
[INFO|trainer.py:2422] 2026-04-10 23:43:47,713 >> Total optimization steps = 340
|
||
[INFO|trainer.py:2423] 2026-04-10 23:43:47,714 >> Number of trainable parameters = 1,003,782,656
|
||
[INFO|integration_utils.py:831] 2026-04-10 23:43:47,714 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
|
||
wandb: Currently logged in as: can-not-fand (can-not-fand-northeastern-university). Use `wandb login --relogin` to force relogin
|
||
wandb: wandb version 0.25.1 is available! To upgrade, please run:
|
||
wandb: $ pip install wandb --upgrade
|
||
wandb: Tracking run with wandb version 0.17.5
|
||
wandb: Run data is saved locally in /scratch/feng.yulu/dynamic-dpo-v4/wandb/wandb/run-20260410_234350-4j5nnm1b
|
||
wandb: Run `wandb offline` to turn off syncing.
|
||
wandb: Syncing run llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108
|
||
wandb: ⭐️ View project at https://wandb.ai/can-not-fand-northeastern-university/huggingface
|
||
wandb: 🚀 View run at https://wandb.ai/can-not-fand-northeastern-university/huggingface/runs/4j5nnm1b
|
||
|
||
0%| | 0/340 [00:00<?, ?it/s][WARNING|modeling_utils.py:1713] 2026-04-10 23:43:56,949 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
||
[WARNING|modeling_utils.py:1713] 2026-04-10 23:43:56,949 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
||
[WARNING|modeling_utils.py:1713] 2026-04-10 23:43:56,949 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
||
[WARNING|modeling_utils.py:1713] 2026-04-10 23:43:56,950 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
||
[WARNING|modeling_utils.py:1713] 2026-04-10 23:43:56,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
||
[WARNING|modeling_utils.py:1713] 2026-04-10 23:43:56,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
||
[WARNING|modeling_utils.py:1713] 2026-04-10 23:43:56,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
||
[WARNING|modeling_utils.py:1713] 2026-04-10 23:43:56,951 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
||
|
||
0%| | 1/340 [00:03<19:38, 3.48s/it]
|
||
|
||
{'loss': 0.6932, 'grad_norm': 2.3687267303466797, 'learning_rate': 0.0, 'rewards/chosen': 9.683193638920784e-06, 'rewards/rejected': 0.00013133684115018696, 'rewards/accuracies': 0.515625, 'rewards/margins': -0.0001216536620631814, 'logps/chosen': -69.28079223632812, 'logps/rejected': -69.7318344116211, 'logps/ref_chosen': -69.2831802368164, 'logps/ref_rejected': -69.74366760253906, 'logits/chosen': -0.5232092142105103, 'logits/rejected': -0.36964714527130127, 'kl/p_epsilon_steps': 0.5, 'kl/n_epsilon_steps': 0.5, 'kl/beta': 0.009999999776482582, 'kl/avg_steps': 0.0, 'epoch': 0.0}
|
||
|
||
0%| | 1/340 [00:03<19:38, 3.48s/it]
|
||
1%| | 2/340 [00:06<18:00, 3.20s/it]
|
||
1%| | 3/340 [00:09<16:52, 3.00s/it]
|
||
1%| | 4/340 [00:11<15:42, 2.80s/it]
|
||
1%|▏ | 5/340 [00:14<15:34, 2.79s/it]
|
||
|
||
{'loss': 0.6932, 'grad_norm': 2.401517868041992, 'learning_rate': 5.88235294117647e-08, 'rewards/chosen': -0.00011636512499535456, 'rewards/rejected': -3.8100268284324557e-05, 'rewards/accuracies': 0.505859375, 'rewards/margins': -7.826486398698762e-05, 'logps/chosen': -75.71084594726562, 'logps/rejected': -81.47822570800781, 'logps/ref_chosen': -75.70054626464844, 'logps/ref_rejected': -81.47293090820312, 'logits/chosen': -0.5336302518844604, 'logits/rejected': -0.41014784574508667, 'kl/p_epsilon_steps': 0.5, 'kl/n_epsilon_steps': 0.498046875, 'kl/beta': 0.009997854940593243, 'kl/avg_steps': 0.001953125, 'epoch': 0.01}
|
||
|
||
1%|▏ | 5/340 [00:14<15:34, 2.79s/it]
|
||
2%|▏ | 6/340 [00:17<15:28, 2.78s/it]
|
||
2%|▏ | 7/340 [00:20<15:21, 2.77s/it]
|
||
2%|▏ | 8/340 [00:22<15:02, 2.72s/it]
|
||
3%|▎ | 9/340 [00:25<15:02, 2.73s/it]
|
||
3%|▎ | 10/340 [00:28<15:01, 2.73s/it]
|
||
|
||
{'loss': 0.6932, 'grad_norm': 2.312957525253296, 'learning_rate': 1.3235294117647057e-07, 'rewards/chosen': -7.36937508918345e-05, 'rewards/rejected': -6.373519863700494e-05, 'rewards/accuracies': 0.4765625, 'rewards/margins': -9.958527698472608e-06, 'logps/chosen': -77.008544921875, 'logps/rejected': -82.64922332763672, 'logps/ref_chosen': -77.0025405883789, 'logps/ref_rejected': -82.64138793945312, 'logits/chosen': -0.5401719808578491, 'logits/rejected': -0.4321846067905426, 'kl/p_epsilon_steps': 0.4703125059604645, 'kl/n_epsilon_steps': 0.5234375, 'kl/beta': 0.010005339980125427, 'kl/avg_steps': -0.05312500149011612, 'epoch': 0.03}
|
||
|
||
3%|▎ | 10/340 [00:28<15:01, 2.73s/it]
|
||
3%|▎ | 11/340 [00:30<15:04, 2.75s/it]
|
||
4%|▎ | 12/340 [00:33<15:01, 2.75s/it]
|
||
4%|▍ | 13/340 [00:36<15:00, 2.75s/it]
|
||
4%|▍ | 14/340 [00:39<14:51, 2.74s/it]
|
||
4%|▍ | 15/340 [00:41<14:45, 2.73s/it]
|
||
|
||
{'loss': 0.6928, 'grad_norm': 2.890460968017578, 'learning_rate': 2.0588235294117645e-07, 'rewards/chosen': 8.707816596142948e-05, 'rewards/rejected': -0.0005263587227091193, 'rewards/accuracies': 0.5718749761581421, 'rewards/margins': 0.0006134368595667183, 'logps/chosen': -70.82783508300781, 'logps/rejected': -87.48735809326172, 'logps/ref_chosen': -70.83788299560547, 'logps/ref_rejected': -87.43305206298828, 'logits/chosen': -0.5125764608383179, 'logits/rejected': -0.4432317316532135, 'kl/p_epsilon_steps': 0.5640624761581421, 'kl/n_epsilon_steps': 0.43437498807907104, 'kl/beta': 0.010008977726101875, 'kl/avg_steps': 0.12968750298023224, 'epoch': 0.04}
|
||
|
||
4%|▍ | 15/340 [00:41<14:45, 2.73s/it]
|
||
5%|▍ | 16/340 [00:44<14:50, 2.75s/it]
|
||
5%|▌ | 17/340 [00:47<14:38, 2.72s/it]
|
||
5%|▌ | 18/340 [00:49<14:32, 2.71s/it]
|
||
6%|▌ | 19/340 [00:52<14:23, 2.69s/it]
|
||
6%|▌ | 20/340 [00:55<14:22, 2.70s/it]
|
||
|
||
{'loss': 0.6922, 'grad_norm': 2.124864101409912, 'learning_rate': 2.7941176470588235e-07, 'rewards/chosen': 0.00024056310940068215, 'rewards/rejected': -0.0016350041842088103, 'rewards/accuracies': 0.6499999761581421, 'rewards/margins': 0.0018755672499537468, 'logps/chosen': -70.1437759399414, 'logps/rejected': -82.44139099121094, 'logps/ref_chosen': -70.1697006225586, 'logps/ref_rejected': -82.27420806884766, 'logits/chosen': -0.5464522242546082, 'logits/rejected': -0.4405369162559509, 'kl/p_epsilon_steps': 0.6312500238418579, 'kl/n_epsilon_steps': 0.3656249940395355, 'kl/beta': 0.009920386597514153, 'kl/avg_steps': 0.265625, 'epoch': 0.06}
|
||
|
||
6%|▌ | 20/340 [00:55<14:22, 2.70s/it]
|
||
6%|▌ | 21/340 [00:58<14:33, 2.74s/it]
|
||
6%|▋ | 22/340 [01:00<14:33, 2.75s/it]
|
||
7%|▋ | 23/340 [01:03<14:30, 2.75s/it]
|
||
7%|▋ | 24/340 [01:06<14:40, 2.79s/it]
|
||
7%|▋ | 25/340 [01:09<14:29, 2.76s/it]
|
||
|
||
{'loss': 0.6904, 'grad_norm': 2.521970510482788, 'learning_rate': 3.529411764705882e-07, 'rewards/chosen': 0.0008119211415760219, 'rewards/rejected': -0.00473719323053956, 'rewards/accuracies': 0.7796875238418579, 'rewards/margins': 0.005549114663153887, 'logps/chosen': -74.4179458618164, 'logps/rejected': -90.02223205566406, 'logps/ref_chosen': -74.5040283203125, 'logps/ref_rejected': -89.5297622680664, 'logits/chosen': -0.568504273891449, 'logits/rejected': -0.4329379200935364, 'kl/p_epsilon_steps': 0.7718750238418579, 'kl/n_epsilon_steps': 0.22812500596046448, 'kl/beta': 0.009726567193865776, 'kl/avg_steps': 0.543749988079071, 'epoch': 0.07}
|
||
|
||
7%|▋ | 25/340 [01:09<14:29, 2.76s/it]
|
||
8%|▊ | 26/340 [01:11<14:18, 2.73s/it]
|
||
8%|▊ | 27/340 [01:14<13:58, 2.68s/it]
|
||
8%|▊ | 28/340 [01:17<13:57, 2.68s/it]
|
||
9%|▊ | 29/340 [01:19<14:00, 2.70s/it]
|
||
9%|▉ | 30/340 [01:22<14:02, 2.72s/it]
|
||
|
||
{'loss': 0.6867, 'grad_norm': 2.3963418006896973, 'learning_rate': 4.264705882352941e-07, 'rewards/chosen': 0.0004493276646826416, 'rewards/rejected': -0.012645403854548931, 'rewards/accuracies': 0.8125, 'rewards/margins': 0.01309473067522049, 'logps/chosen': -76.55107879638672, 'logps/rejected': -83.71476745605469, 'logps/ref_chosen': -76.60227966308594, 'logps/ref_rejected': -82.36322784423828, 'logits/chosen': -0.6653466820716858, 'logits/rejected': -0.49282917380332947, 'kl/p_epsilon_steps': 0.7828124761581421, 'kl/n_epsilon_steps': 0.21562500298023224, 'kl/beta': 0.00945484172552824, 'kl/avg_steps': 0.567187488079071, 'epoch': 0.09}
|
||
|
||
9%|▉ | 30/340 [01:22<14:02, 2.72s/it]
|
||
9%|▉ | 31/340 [01:25<14:00, 2.72s/it]
|
||
9%|▉ | 32/340 [01:28<14:05, 2.74s/it]
|
||
10%|▉ | 33/340 [01:30<14:00, 2.74s/it]
|
||
10%|█ | 34/340 [01:33<13:44, 2.70s/it]
|
||
10%|█ | 35/340 [01:36<13:46, 2.71s/it]
|
||
|
||
{'loss': 0.6835, 'grad_norm': 2.311098337173462, 'learning_rate': 5e-07, 'rewards/chosen': -0.003281622426584363, 'rewards/rejected': -0.02297242358326912, 'rewards/accuracies': 0.809374988079071, 'rewards/margins': 0.019690800458192825, 'logps/chosen': -76.14710998535156, 'logps/rejected': -86.21320343017578, 'logps/ref_chosen': -75.79379272460938, 'logps/ref_rejected': -83.69039154052734, 'logits/chosen': -0.6610927581787109, 'logits/rejected': -0.5268033146858215, 'kl/p_epsilon_steps': 0.776562511920929, 'kl/n_epsilon_steps': 0.22187499701976776, 'kl/beta': 0.009198471903800964, 'kl/avg_steps': 0.5546875, 'epoch': 0.1}
|
||
|
||
10%|█ | 35/340 [01:36<13:46, 2.71s/it]
|
||
11%|█ | 36/340 [01:38<13:47, 2.72s/it]
|
||
11%|█ | 37/340 [01:41<13:42, 2.71s/it]
|
||
11%|█ | 38/340 [01:44<13:38, 2.71s/it]
|
||
11%|█▏ | 39/340 [01:47<13:33, 2.70s/it]
|
||
12%|█▏ | 40/340 [01:49<13:39, 2.73s/it]
|
||
|
||
{'loss': 0.6732, 'grad_norm': 2.681466817855835, 'learning_rate': 4.996706849759452e-07, 'rewards/chosen': -0.021187324076890945, 'rewards/rejected': -0.06287702172994614, 'rewards/accuracies': 0.78125, 'rewards/margins': 0.04168969392776489, 'logps/chosen': -77.57659149169922, 'logps/rejected': -93.75047302246094, 'logps/ref_chosen': -75.21812438964844, 'logps/ref_rejected': -86.6792984008789, 'logits/chosen': -0.8570469617843628, 'logits/rejected': -0.7218376398086548, 'kl/p_epsilon_steps': 0.703125, 'kl/n_epsilon_steps': 0.2953124940395355, 'kl/beta': 0.008961381390690804, 'kl/avg_steps': 0.4078125059604645, 'epoch': 0.12}
|
||
|
||
12%|█▏ | 40/340 [01:49<13:39, 2.73s/it]
|
||
12%|█▏ | 41/340 [01:52<13:41, 2.75s/it]
|
||
12%|█▏ | 42/340 [01:55<13:36, 2.74s/it]
|
||
13%|█▎ | 43/340 [01:58<13:30, 2.73s/it]
|
||
13%|█▎ | 44/340 [02:00<13:21, 2.71s/it]
|
||
13%|█▎ | 45/340 [02:03<13:13, 2.69s/it]
|
||
|
||
{'loss': 0.6715, 'grad_norm': 3.2158050537109375, 'learning_rate': 4.986836074908615e-07, 'rewards/chosen': -0.049075882881879807, 'rewards/rejected': -0.09539631009101868, 'rewards/accuracies': 0.7515624761581421, 'rewards/margins': 0.04632042720913887, 'logps/chosen': -82.83192443847656, 'logps/rejected': -102.5731201171875, 'logps/ref_chosen': -77.2712173461914, 'logps/ref_rejected': -91.67030334472656, 'logits/chosen': -0.9623914957046509, 'logits/rejected': -0.8303581476211548, 'kl/p_epsilon_steps': 0.659375011920929, 'kl/n_epsilon_steps': 0.34062498807907104, 'kl/beta': 0.008803511038422585, 'kl/avg_steps': 0.3187499940395355, 'epoch': 0.13}
|
||
|
||
13%|█▎ | 45/340 [02:03<13:13, 2.69s/it]
|
||
14%|█▎ | 46/340 [02:06<13:21, 2.73s/it]
|
||
14%|█▍ | 47/340 [02:08<13:23, 2.74s/it]
|
||
14%|█▍ | 48/340 [02:11<13:20, 2.74s/it]
|
||
14%|█▍ | 49/340 [02:14<13:12, 2.72s/it]
|
||
15%|█▍ | 50/340 [02:17<13:11, 2.73s/it]
|
||
|
||
{'loss': 0.6712, 'grad_norm': 3.3687705993652344, 'learning_rate': 4.970413680203148e-07, 'rewards/chosen': -0.07562652230262756, 'rewards/rejected': -0.12378251552581787, 'rewards/accuracies': 0.698437511920929, 'rewards/margins': 0.048155996948480606, 'logps/chosen': -82.58769226074219, 'logps/rejected': -94.2342758178711, 'logps/ref_chosen': -73.91633605957031, 'logps/ref_rejected': -79.92402648925781, 'logits/chosen': -1.0613696575164795, 'logits/rejected': -0.921216607093811, 'kl/p_epsilon_steps': 0.6031249761581421, 'kl/n_epsilon_steps': 0.3968749940395355, 'kl/beta': 0.008690183982253075, 'kl/avg_steps': 0.20624999701976776, 'epoch': 0.15}
|
||
|
||
15%|█▍ | 50/340 [02:17<13:11, 2.73s/it]
|
||
15%|█▌ | 51/340 [02:19<13:09, 2.73s/it]
|
||
15%|█▌ | 52/340 [02:22<12:51, 2.68s/it]
|
||
16%|█▌ | 53/340 [02:25<12:48, 2.68s/it]
|
||
16%|█▌ | 54/340 [02:27<12:54, 2.71s/it]
|
||
16%|█▌ | 55/340 [02:30<12:51, 2.71s/it]
|
||
|
||
{'loss': 0.6639, 'grad_norm': 4.448400020599365, 'learning_rate': 4.947482930773511e-07, 'rewards/chosen': -0.10501708835363388, 'rewards/rejected': -0.17072856426239014, 'rewards/accuracies': 0.6859375238418579, 'rewards/margins': 0.06571148335933685, 'logps/chosen': -91.91180419921875, 'logps/rejected': -103.12516021728516, 'logps/ref_chosen': -79.74378204345703, 'logps/ref_rejected': -83.18132019042969, 'logits/chosen': -1.1757243871688843, 'logits/rejected': -1.0121644735336304, 'kl/p_epsilon_steps': 0.604687511920929, 'kl/n_epsilon_steps': 0.39375001192092896, 'kl/beta': 0.00860314816236496, 'kl/avg_steps': 0.2109375, 'epoch': 0.16}
|
||
|
||
16%|█▌ | 55/340 [02:30<12:51, 2.71s/it]
|
||
16%|█▋ | 56/340 [02:33<12:54, 2.73s/it]
|
||
17%|█▋ | 57/340 [02:36<12:57, 2.75s/it]
|
||
17%|█▋ | 58/340 [02:38<12:53, 2.74s/it]
|
||
17%|█▋ | 59/340 [02:41<12:47, 2.73s/it]
|
||
18%|█▊ | 60/340 [02:44<12:50, 2.75s/it]
|
||
|
||
{'loss': 0.6663, 'grad_norm': 3.918736219406128, 'learning_rate': 4.918104238142103e-07, 'rewards/chosen': -0.14256855845451355, 'rewards/rejected': -0.20627903938293457, 'rewards/accuracies': 0.660937488079071, 'rewards/margins': 0.06371048837900162, 'logps/chosen': -98.29524993896484, 'logps/rejected': -105.27479553222656, 'logps/ref_chosen': -81.61141967773438, 'logps/ref_rejected': -80.947998046875, 'logits/chosen': -1.2249476909637451, 'logits/rejected': -1.1036134958267212, 'kl/p_epsilon_steps': 0.5859375, 'kl/n_epsilon_steps': 0.4140625, 'kl/beta': 0.008520014584064484, 'kl/avg_steps': 0.171875, 'epoch': 0.18}
|
||
|
||
18%|█▊ | 60/340 [02:44<12:50, 2.75s/it]
|
||
18%|█▊ | 61/340 [02:46<12:32, 2.70s/it]
|
||
18%|█▊ | 62/340 [02:49<12:36, 2.72s/it]
|
||
19%|█▊ | 63/340 [02:52<12:35, 2.73s/it]
|
||
19%|█▉ | 64/340 [02:55<12:34, 2.73s/it]
|
||
19%|█▉ | 65/340 [02:57<12:24, 2.71s/it]
|
||
|
||
{'loss': 0.6479, 'grad_norm': 3.5865535736083984, 'learning_rate': 4.882355001067891e-07, 'rewards/chosen': -0.14002391695976257, 'rewards/rejected': -0.24343439936637878, 'rewards/accuracies': 0.6875, 'rewards/margins': 0.10341048240661621, 'logps/chosen': -91.71420288085938, 'logps/rejected': -117.06733703613281, 'logps/ref_chosen': -75.09439849853516, 'logps/ref_rejected': -87.96830749511719, 'logits/chosen': -1.2322965860366821, 'logits/rejected': -1.151759147644043, 'kl/p_epsilon_steps': 0.637499988079071, 'kl/n_epsilon_steps': 0.3609375059604645, 'kl/beta': 0.00841777864843607, 'kl/avg_steps': 0.27656251192092896, 'epoch': 0.19}
|
||
|
||
19%|█▉ | 65/340 [02:57<12:24, 2.71s/it]
|
||
19%|█▉ | 66/340 [03:00<12:12, 2.67s/it]
|
||
20%|█▉ | 67/340 [03:03<12:09, 2.67s/it]
|
||
20%|██ | 68/340 [03:05<12:08, 2.68s/it]
|
||
20%|██ | 69/340 [03:08<11:56, 2.64s/it]
|
||
21%|██ | 70/340 [03:11<11:54, 2.65s/it]
|
||
|
||
{'loss': 0.6462, 'grad_norm': 3.871297836303711, 'learning_rate': 4.840329401637809e-07, 'rewards/chosen': -0.16316808760166168, 'rewards/rejected': -0.27303391695022583, 'rewards/accuracies': 0.7265625, 'rewards/margins': 0.10986582934856415, 'logps/chosen': -89.69293975830078, 'logps/rejected': -122.0387954711914, 'logps/ref_chosen': -70.07804870605469, 'logps/ref_rejected': -88.98612976074219, 'logits/chosen': -1.2796670198440552, 'logits/rejected': -1.1985622644424438, 'kl/p_epsilon_steps': 0.6421874761581421, 'kl/n_epsilon_steps': 0.3578124940395355, 'kl/beta': 0.008305966854095459, 'kl/avg_steps': 0.28437501192092896, 'epoch': 0.21}
|
||
|
||
21%|██ | 70/340 [03:11<11:54, 2.65s/it]
|
||
21%|██ | 71/340 [03:13<11:59, 2.67s/it]
|
||
21%|██ | 72/340 [03:16<12:04, 2.70s/it]
|
||
21%|██▏ | 73/340 [03:19<12:02, 2.71s/it]
|
||
22%|██▏ | 74/340 [03:22<12:01, 2.71s/it]
|
||
22%|██▏ | 75/340 [03:24<12:01, 2.72s/it]
|
||
|
||
{'loss': 0.6538, 'grad_norm': 3.9685800075531006, 'learning_rate': 4.792138157142157e-07, 'rewards/chosen': -0.191951185464859, 'rewards/rejected': -0.2879168391227722, 'rewards/accuracies': 0.6796875, 'rewards/margins': 0.09596569836139679, 'logps/chosen': -101.08387756347656, 'logps/rejected': -117.42021179199219, 'logps/ref_chosen': -77.74958801269531, 'logps/ref_rejected': -82.17206573486328, 'logits/chosen': -1.2629064321517944, 'logits/rejected': -1.1684788465499878, 'kl/p_epsilon_steps': 0.59375, 'kl/n_epsilon_steps': 0.40625, 'kl/beta': 0.008209030143916607, 'kl/avg_steps': 0.1875, 'epoch': 0.22}
|
||
|
||
22%|██▏ | 75/340 [03:24<12:01, 2.72s/it]
|
||
22%|██▏ | 76/340 [03:27<12:00, 2.73s/it]
|
||
23%|██▎ | 77/340 [03:30<12:00, 2.74s/it]
|
||
23%|██▎ | 78/340 [03:32<11:55, 2.73s/it]
|
||
23%|██▎ | 79/340 [03:35<11:51, 2.72s/it]
|
||
24%|██▎ | 80/340 [03:38<11:44, 2.71s/it]
|
||
|
||
{'loss': 0.6438, 'grad_norm': 4.582348823547363, 'learning_rate': 4.737908228387656e-07, 'rewards/chosen': -0.20838662981987, 'rewards/rejected': -0.32801881432533264, 'rewards/accuracies': 0.6875, 'rewards/margins': 0.11963216215372086, 'logps/chosen': -107.53079986572266, 'logps/rejected': -131.16079711914062, 'logps/ref_chosen': -81.88478088378906, 'logps/ref_rejected': -90.519775390625, 'logits/chosen': -1.2720203399658203, 'logits/rejected': -1.218477725982666, 'kl/p_epsilon_steps': 0.621874988079071, 'kl/n_epsilon_steps': 0.37812501192092896, 'kl/beta': 0.008118118159472942, 'kl/avg_steps': 0.24375000596046448, 'epoch': 0.24}
|
||
|
||
24%|██▎ | 80/340 [03:38<11:44, 2.71s/it]
|
||
24%|██▍ | 81/340 [03:41<11:46, 2.73s/it]
|
||
24%|██▍ | 82/340 [03:43<11:32, 2.68s/it]
|
||
24%|██▍ | 83/340 [03:46<11:18, 2.64s/it]
|
||
25%|██▍ | 84/340 [03:48<11:22, 2.67s/it]
|
||
25%|██▌ | 85/340 [03:51<11:30, 2.71s/it]
|
||
|
||
{'loss': 0.6418, 'grad_norm': 3.6524829864501953, 'learning_rate': 4.6777824852166437e-07, 'rewards/chosen': -0.20263484120368958, 'rewards/rejected': -0.32719942927360535, 'rewards/accuracies': 0.684374988079071, 'rewards/margins': 0.12456460297107697, 'logps/chosen': -95.5977554321289, 'logps/rejected': -118.98405456542969, 'logps/ref_chosen': -70.41683197021484, 'logps/ref_rejected': -78.02936553955078, 'logits/chosen': -1.2834303379058838, 'logits/rejected': -1.198880672454834, 'kl/p_epsilon_steps': 0.6234375238418579, 'kl/n_epsilon_steps': 0.37187498807907104, 'kl/beta': 0.0080325398594141, 'kl/avg_steps': 0.2515625059604645, 'epoch': 0.25}
|
||
|
||
25%|██▌ | 85/340 [03:51<11:30, 2.71s/it]
|
||
25%|██▌ | 86/340 [03:54<11:28, 2.71s/it]
|
||
26%|██▌ | 87/340 [03:57<11:20, 2.69s/it]
|
||
26%|██▌ | 88/340 [03:59<11:15, 2.68s/it]
|
||
26%|██▌ | 89/340 [04:02<11:16, 2.70s/it]
|
||
26%|██▋ | 90/340 [04:05<11:15, 2.70s/it]
|
||
|
||
{'loss': 0.6361, 'grad_norm': 4.22735071182251, 'learning_rate': 4.611919330113591e-07, 'rewards/chosen': -0.23172405362129211, 'rewards/rejected': -0.37008827924728394, 'rewards/accuracies': 0.706250011920929, 'rewards/margins': 0.13836422562599182, 'logps/chosen': -105.8456039428711, 'logps/rejected': -136.4986572265625, 'logps/ref_chosen': -76.6160888671875, 'logps/ref_rejected': -89.49937438964844, 'logits/chosen': -1.2632228136062622, 'logits/rejected': -1.2163931131362915, 'kl/p_epsilon_steps': 0.6421874761581421, 'kl/n_epsilon_steps': 0.35468751192092896, 'kl/beta': 0.007919726893305779, 'kl/avg_steps': 0.2874999940395355, 'epoch': 0.26}
|
||
|
||
26%|██▋ | 90/340 [04:05<11:15, 2.70s/it]
|
||
27%|██▋ | 91/340 [04:07<11:14, 2.71s/it]
|
||
27%|██▋ | 92/340 [04:10<11:22, 2.75s/it]
|
||
27%|██▋ | 93/340 [04:13<11:17, 2.74s/it]
|
||
28%|██▊ | 94/340 [04:16<11:09, 2.72s/it]
|
||
28%|██▊ | 95/340 [04:18<10:57, 2.68s/it]
|
||
|
||
{'loss': 0.6411, 'grad_norm': 4.236695766448975, 'learning_rate': 4.5404922808905543e-07, 'rewards/chosen': -0.24040882289409637, 'rewards/rejected': -0.36987045407295227, 'rewards/accuracies': 0.7015625238418579, 'rewards/margins': 0.1294616460800171, 'logps/chosen': -104.29510498046875, 'logps/rejected': -124.16410827636719, 'logps/ref_chosen': -73.50260162353516, 'logps/ref_rejected': -76.48811340332031, 'logits/chosen': -1.2625572681427002, 'logits/rejected': -1.2011988162994385, 'kl/p_epsilon_steps': 0.637499988079071, 'kl/n_epsilon_steps': 0.36250001192092896, 'kl/beta': 0.0078009068965911865, 'kl/avg_steps': 0.2750000059604645, 'epoch': 0.28}
|
||
|
||
28%|██▊ | 95/340 [04:18<10:57, 2.68s/it]
|
||
28%|██▊ | 96/340 [04:21<11:00, 2.71s/it]
|
||
29%|██▊ | 97/340 [04:24<10:48, 2.67s/it]
|
||
29%|██▉ | 98/340 [04:26<10:52, 2.70s/it]
|
||
29%|██▉ | 99/340 [04:29<11:10, 2.78s/it]
|
||
29%|██▉ | 100/340 [04:32<11:05, 2.77s/it]
|
||
|
||
{'loss': 0.6236, 'grad_norm': 4.193100452423096, 'learning_rate': 4.4636895135509966e-07, 'rewards/chosen': -0.24038386344909668, 'rewards/rejected': -0.4081670641899109, 'rewards/accuracies': 0.746874988079071, 'rewards/margins': 0.16778317093849182, 'logps/chosen': -103.88249206542969, 'logps/rejected': -134.57403564453125, 'logps/ref_chosen': -72.6116714477539, 'logps/ref_rejected': -81.16241455078125, 'logits/chosen': -1.2317556142807007, 'logits/rejected': -1.1946831941604614, 'kl/p_epsilon_steps': 0.682812511920929, 'kl/n_epsilon_steps': 0.31718748807907104, 'kl/beta': 0.0076876478269696236, 'kl/avg_steps': 0.3656249940395355, 'epoch': 0.29}
|
||
|
||
29%|██▉ | 100/340 [04:32<11:05, 2.77s/it][INFO|trainer.py:4307] 2026-04-10 23:48:26,201 >>
|
||
***** Running Evaluation *****
|
||
[INFO|trainer.py:4309] 2026-04-10 23:48:26,201 >> Num examples = 2339
|
||
[INFO|trainer.py:4312] 2026-04-10 23:48:26,201 >> Batch size = 16
|
||
|
||
|
||
0%| | 0/18 [00:00<?, ?it/s][A
|
||
|
||
11%|█ | 2/18 [00:01<00:09, 1.62it/s][A
|
||
|
||
17%|█▋ | 3/18 [00:02<00:12, 1.16it/s][A
|
||
|
||
22%|██▏ | 4/18 [00:03<00:14, 1.01s/it][A
|
||
|
||
28%|██▊ | 5/18 [00:04<00:14, 1.09s/it][A
|
||
|
||
33%|███▎ | 6/18 [00:06<00:13, 1.15s/it][A
|
||
|
||
39%|███▉ | 7/18 [00:07<00:13, 1.18s/it][A
|
||
|
||
44%|████▍ | 8/18 [00:08<00:12, 1.21s/it][A
|
||
|
||
50%|█████ | 9/18 [00:09<00:10, 1.22s/it][A
|
||
|
||
56%|█████▌ | 10/18 [00:11<00:09, 1.23s/it][A
|
||
|
||
61%|██████ | 11/18 [00:12<00:08, 1.22s/it][A
|
||
|
||
67%|██████▋ | 12/18 [00:13<00:07, 1.24s/it][A
|
||
|
||
72%|███████▏ | 13/18 [00:14<00:06, 1.24s/it][A
|
||
|
||
78%|███████▊ | 14/18 [00:16<00:04, 1.25s/it][A
|
||
|
||
83%|████████▎ | 15/18 [00:17<00:03, 1.25s/it][A
|
||
|
||
89%|████████▉ | 16/18 [00:18<00:02, 1.25s/it][A
|
||
|
||
94%|█████████▍| 17/18 [00:19<00:01, 1.25s/it][A
|
||
|
||
100%|██████████| 18/18 [00:21<00:00, 1.24s/it][A
|
||
|
||
|
||
|
||
[A{'eval_loss': 0.6636335253715515, 'eval_runtime': 22.4366, 'eval_samples_per_second': 104.249, 'eval_steps_per_second': 0.847, 'eval_rewards/chosen': -0.30329596996307373, 'eval_rewards/rejected': -0.38952386379241943, 'eval_rewards/accuracies': 0.6124131679534912, 'eval_rewards/margins': 0.08622786402702332, 'eval_logps/chosen': -127.64717864990234, 'eval_logps/rejected': -134.3017578125, 'eval_logps/ref_chosen': -87.82356262207031, 'eval_logps/ref_rejected': -82.81887817382812, 'eval_logits/chosen': -1.2261141538619995, 'eval_logits/rejected': -1.1807267665863037, 'eval_kl/p_epsilon_steps': 0.5759548544883728, 'eval_kl/n_epsilon_steps': 0.4236111044883728, 'epoch': 0.29}
|
||
|
||
29%|██▉ | 100/340 [04:55<11:05, 2.77s/it]
|
||
|
||
100%|██████████| 18/18 [00:21<00:00, 1.24s/it][A
|
||
|
||
[A
|
||
30%|██▉ | 101/340 [04:57<37:48, 9.49s/it]
|
||
30%|███ | 102/340 [05:00<29:36, 7.46s/it]
|
||
30%|███ | 103/340 [05:03<23:42, 6.00s/it]
|
||
31%|███ | 104/340 [05:05<19:43, 5.02s/it]
|
||
31%|███ | 105/340 [05:08<16:50, 4.30s/it]
|
||
|
||
{'loss': 0.6304, 'grad_norm': 4.206778049468994, 'learning_rate': 4.381713366536311e-07, 'rewards/chosen': -0.2697572112083435, 'rewards/rejected': -0.42430782318115234, 'rewards/accuracies': 0.699999988079071, 'rewards/margins': 0.15455064177513123, 'logps/chosen': -112.22574615478516, 'logps/rejected': -140.7528533935547, 'logps/ref_chosen': -76.5867919921875, 'logps/ref_rejected': -84.33440399169922, 'logits/chosen': -1.2459900379180908, 'logits/rejected': -1.1858142614364624, 'kl/p_epsilon_steps': 0.640625, 'kl/n_epsilon_steps': 0.359375, 'kl/beta': 0.007563448045402765, 'kl/avg_steps': 0.28125, 'epoch': 0.31}
|
||
|
||
31%|███ | 105/340 [05:08<16:50, 4.30s/it]
|
||
31%|███ | 106/340 [05:11<14:49, 3.80s/it]
|
||
31%|███▏ | 107/340 [05:13<13:27, 3.47s/it]
|
||
32%|███▏ | 108/340 [05:16<12:30, 3.24s/it]
|
||
32%|███▏ | 109/340 [05:19<11:54, 3.09s/it]
|
||
32%|███▏ | 110/340 [05:21<11:23, 2.97s/it]
|
||
|
||
{'loss': 0.6294, 'grad_norm': 5.154345989227295, 'learning_rate': 4.2947798076611047e-07, 'rewards/chosen': -0.3029385209083557, 'rewards/rejected': -0.46550169587135315, 'rewards/accuracies': 0.692187488079071, 'rewards/margins': 0.16256316006183624, 'logps/chosen': -118.81462097167969, 'logps/rejected': -146.4515838623047, 'logps/ref_chosen': -78.16385650634766, 'logps/ref_rejected': -83.61200714111328, 'logits/chosen': -1.2248286008834839, 'logits/rejected': -1.1694958209991455, 'kl/p_epsilon_steps': 0.6421874761581421, 'kl/n_epsilon_steps': 0.3578124940395355, 'kl/beta': 0.007447557989507914, 'kl/avg_steps': 0.28437501192092896, 'epoch': 0.32}
|
||
|
||
32%|███▏ | 110/340 [05:21<11:23, 2.97s/it]
|
||
33%|███▎ | 111/340 [05:24<10:58, 2.88s/it]
|
||
33%|███▎ | 112/340 [05:26<10:11, 2.68s/it]
|
||
33%|███▎ | 113/340 [05:29<10:02, 2.65s/it]
|
||
34%|███▎ | 114/340 [05:32<10:02, 2.67s/it]
|
||
34%|███▍ | 115/340 [05:34<10:00, 2.67s/it]
|
||
|
||
{'loss': 0.618, 'grad_norm': 5.148464679718018, 'learning_rate': 4.203117865141635e-07, 'rewards/chosen': -0.3210408687591553, 'rewards/rejected': -0.516320526599884, 'rewards/accuracies': 0.7265625, 'rewards/margins': 0.19527961313724518, 'logps/chosen': -118.66552734375, 'logps/rejected': -156.08580017089844, 'logps/ref_chosen': -74.8998031616211, 'logps/ref_rejected': -85.2784652709961, 'logits/chosen': -1.2170436382293701, 'logits/rejected': -1.1504008769989014, 'kl/p_epsilon_steps': 0.676562488079071, 'kl/n_epsilon_steps': 0.3218750059604645, 'kl/beta': 0.007336863782256842, 'kl/avg_steps': 0.35468751192092896, 'epoch': 0.34}
|
||
|
||
34%|███▍ | 115/340 [05:34<10:00, 2.67s/it]
|
||
34%|███▍ | 116/340 [05:37<10:10, 2.72s/it]
|
||
34%|███▍ | 117/340 [05:40<10:07, 2.72s/it]
|
||
35%|███▍ | 118/340 [05:43<10:03, 2.72s/it]
|
||
35%|███▌ | 119/340 [05:45<10:01, 2.72s/it]
|
||
35%|███▌ | 120/340 [05:48<09:58, 2.72s/it]
|
||
|
||
{'loss': 0.6197, 'grad_norm': 5.226547718048096, 'learning_rate': 4.106969024216348e-07, 'rewards/chosen': -0.3315422534942627, 'rewards/rejected': -0.5241755247116089, 'rewards/accuracies': 0.698437511920929, 'rewards/margins': 0.19263319671154022, 'logps/chosen': -119.46983337402344, 'logps/rejected': -158.82113647460938, 'logps/ref_chosen': -73.58607482910156, 'logps/ref_rejected': -85.84365844726562, 'logits/chosen': -1.1989049911499023, 'logits/rejected': -1.1565752029418945, 'kl/p_epsilon_steps': 0.6328125, 'kl/n_epsilon_steps': 0.3671875, 'kl/beta': 0.007222268730401993, 'kl/avg_steps': 0.265625, 'epoch': 0.35}
|
||
|
||
35%|███▌ | 120/340 [05:48<09:58, 2.72s/it]
|
||
36%|███▌ | 121/340 [05:51<09:45, 2.68s/it]
|
||
36%|███▌ | 122/340 [05:54<10:04, 2.77s/it]
|
||
36%|███▌ | 123/340 [05:56<10:01, 2.77s/it]
|
||
36%|███▋ | 124/340 [05:59<09:48, 2.73s/it]
|
||
37%|███▋ | 125/340 [06:01<09:26, 2.63s/it]
|
||
|
||
{'loss': 0.6139, 'grad_norm': 5.764974594116211, 'learning_rate': 4.006586590948141e-07, 'rewards/chosen': -0.3550013303756714, 'rewards/rejected': -0.5658854246139526, 'rewards/accuracies': 0.715624988079071, 'rewards/margins': 0.21088404953479767, 'logps/chosen': -130.13233947753906, 'logps/rejected': -161.29537963867188, 'logps/ref_chosen': -80.25770568847656, 'logps/ref_rejected': -81.34100341796875, 'logits/chosen': -1.1903568506240845, 'logits/rejected': -1.130084753036499, 'kl/p_epsilon_steps': 0.6546875238418579, 'kl/n_epsilon_steps': 0.3453125059604645, 'kl/beta': 0.007117821369320154, 'kl/avg_steps': 0.30937498807907104, 'epoch': 0.37}
|
||
|
||
37%|███▋ | 125/340 [06:01<09:26, 2.63s/it]
|
||
37%|███▋ | 126/340 [06:04<09:19, 2.61s/it]
|
||
37%|███▋ | 127/340 [06:07<09:21, 2.64s/it]
|
||
38%|███▊ | 128/340 [06:09<09:23, 2.66s/it]
|
||
38%|███▊ | 129/340 [06:12<09:23, 2.67s/it]
|
||
38%|███▊ | 130/340 [06:15<09:23, 2.68s/it]
|
||
|
||
{'loss': 0.6209, 'grad_norm': 5.23793363571167, 'learning_rate': 3.9022350248844246e-07, 'rewards/chosen': -0.3804508149623871, 'rewards/rejected': -0.5780693292617798, 'rewards/accuracies': 0.6812499761581421, 'rewards/margins': 0.19761842489242554, 'logps/chosen': -128.90423583984375, 'logps/rejected': -167.0582733154297, 'logps/ref_chosen': -74.67902374267578, 'logps/ref_rejected': -84.1854019165039, 'logits/chosen': -1.1651326417922974, 'logits/rejected': -1.1263306140899658, 'kl/p_epsilon_steps': 0.635937511920929, 'kl/n_epsilon_steps': 0.36406248807907104, 'kl/beta': 0.007017888128757477, 'kl/avg_steps': 0.2718749940395355, 'epoch': 0.38}
|
||
|
||
38%|███▊ | 130/340 [06:15<09:23, 2.68s/it]
|
||
39%|███▊ | 131/340 [06:17<09:06, 2.61s/it]
|
||
39%|███▉ | 132/340 [06:20<09:13, 2.66s/it]
|
||
39%|███▉ | 133/340 [06:23<09:04, 2.63s/it]
|
||
39%|███▉ | 134/340 [06:25<09:02, 2.63s/it]
|
||
40%|███▉ | 135/340 [06:28<09:03, 2.65s/it]
|
||
|
||
{'loss': 0.6207, 'grad_norm': 5.766234874725342, 'learning_rate': 3.794189242333106e-07, 'rewards/chosen': -0.39489540457725525, 'rewards/rejected': -0.5950329303741455, 'rewards/accuracies': 0.699999988079071, 'rewards/margins': 0.20013752579689026, 'logps/chosen': -138.46322631835938, 'logps/rejected': -174.3780059814453, 'logps/ref_chosen': -81.2975845336914, 'logps/ref_rejected': -87.74832916259766, 'logits/chosen': -1.1625608205795288, 'logits/rejected': -1.0963513851165771, 'kl/p_epsilon_steps': 0.6625000238418579, 'kl/n_epsilon_steps': 0.3375000059604645, 'kl/beta': 0.006909938994795084, 'kl/avg_steps': 0.32499998807907104, 'epoch': 0.4}
|
||
|
||
40%|███▉ | 135/340 [06:28<09:03, 2.65s/it]
|
||
40%|████ | 136/340 [06:31<09:02, 2.66s/it]
|
||
40%|████ | 137/340 [06:33<08:49, 2.61s/it]
|
||
41%|████ | 138/340 [06:36<08:50, 2.62s/it]
|
||
41%|████ | 139/340 [06:38<08:45, 2.61s/it]
|
||
41%|████ | 140/340 [06:41<08:56, 2.68s/it]
|
||
|
||
{'loss': 0.6009, 'grad_norm': 4.888461112976074, 'learning_rate': 3.6827338920900253e-07, 'rewards/chosen': -0.3417351245880127, 'rewards/rejected': -0.5785812139511108, 'rewards/accuracies': 0.7281249761581421, 'rewards/margins': 0.2368461638689041, 'logps/chosen': -121.53498840332031, 'logps/rejected': -170.3011474609375, 'logps/ref_chosen': -71.20382690429688, 'logps/ref_rejected': -84.62137603759766, 'logits/chosen': -1.172863483428955, 'logits/rejected': -1.106768012046814, 'kl/p_epsilon_steps': 0.690625011920929, 'kl/n_epsilon_steps': 0.30781251192092896, 'kl/beta': 0.006796327419579029, 'kl/avg_steps': 0.3828125, 'epoch': 0.41}
|
||
|
||
41%|████ | 140/340 [06:41<08:56, 2.68s/it]
|
||
41%|████▏ | 141/340 [06:44<08:56, 2.70s/it]
|
||
42%|████▏ | 142/340 [06:47<08:57, 2.72s/it]
|
||
42%|████▏ | 143/340 [06:49<08:53, 2.71s/it]
|
||
42%|████▏ | 144/340 [06:52<08:39, 2.65s/it]
|
||
43%|████▎ | 145/340 [06:55<08:38, 2.66s/it]
|
||
|
||
{'loss': 0.5992, 'grad_norm': 5.264912128448486, 'learning_rate': 3.568162605525952e-07, 'rewards/chosen': -0.3619559407234192, 'rewards/rejected': -0.6057869791984558, 'rewards/accuracies': 0.7421875, 'rewards/margins': 0.2438311129808426, 'logps/chosen': -132.38858032226562, 'logps/rejected': -178.3859405517578, 'logps/ref_chosen': -78.03334045410156, 'logps/ref_rejected': -86.95343017578125, 'logits/chosen': -1.1562573909759521, 'logits/rejected': -1.0977518558502197, 'kl/p_epsilon_steps': 0.6953125, 'kl/n_epsilon_steps': 0.3046875, 'kl/beta': 0.0066697075963020325, 'kl/avg_steps': 0.390625, 'epoch': 0.43}
|
||
|
||
43%|████▎ | 145/340 [06:55<08:38, 2.66s/it]
|
||
43%|████▎ | 146/340 [06:57<08:40, 2.68s/it]
|
||
43%|████▎ | 147/340 [07:00<08:42, 2.71s/it]
|
||
44%|████▎ | 148/340 [07:03<08:37, 2.70s/it]
|
||
44%|████▍ | 149/340 [07:05<08:33, 2.69s/it]
|
||
44%|████▍ | 150/340 [07:08<08:32, 2.70s/it]
|
||
|
||
{'loss': 0.614, 'grad_norm': 5.746659278869629, 'learning_rate': 3.4507772230088147e-07, 'rewards/chosen': -0.4121219515800476, 'rewards/rejected': -0.6363847255706787, 'rewards/accuracies': 0.7015625238418579, 'rewards/margins': 0.22426274418830872, 'logps/chosen': -136.75088500976562, 'logps/rejected': -184.1068878173828, 'logps/ref_chosen': -73.69932556152344, 'logps/ref_rejected': -86.18521118164062, 'logits/chosen': -1.0893394947052002, 'logits/rejected': -1.0510880947113037, 'kl/p_epsilon_steps': 0.6640625, 'kl/n_epsilon_steps': 0.3359375, 'kl/beta': 0.00654013454914093, 'kl/avg_steps': 0.328125, 'epoch': 0.44}
|
||
|
||
44%|████▍ | 150/340 [07:08<08:32, 2.70s/it]
|
||
44%|████▍ | 151/340 [07:11<08:28, 2.69s/it]
|
||
45%|████▍ | 152/340 [07:13<08:26, 2.69s/it]
|
||
45%|████▌ | 153/340 [07:16<08:23, 2.69s/it]
|
||
45%|████▌ | 154/340 [07:19<08:24, 2.71s/it]
|
||
46%|████▌ | 155/340 [07:22<08:19, 2.70s/it]
|
||
|
||
{'loss': 0.6118, 'grad_norm': 5.235478401184082, 'learning_rate': 3.3308869986991487e-07, 'rewards/chosen': -0.4207354485988617, 'rewards/rejected': -0.646741509437561, 'rewards/accuracies': 0.703125, 'rewards/margins': 0.22600603103637695, 'logps/chosen': -144.14666748046875, 'logps/rejected': -183.3446502685547, 'logps/ref_chosen': -78.81468963623047, 'logps/ref_rejected': -82.33976745605469, 'logits/chosen': -1.0995477437973022, 'logits/rejected': -1.031232237815857, 'kl/p_epsilon_steps': 0.671875, 'kl/n_epsilon_steps': 0.328125, 'kl/beta': 0.006440295372158289, 'kl/avg_steps': 0.34375, 'epoch': 0.46}
|
||
|
||
46%|████▌ | 155/340 [07:22<08:19, 2.70s/it]
|
||
46%|████▌ | 156/340 [07:24<08:06, 2.64s/it]
|
||
46%|████▌ | 157/340 [07:27<08:04, 2.65s/it]
|
||
46%|████▋ | 158/340 [07:29<07:59, 2.64s/it]
|
||
47%|████▋ | 159/340 [07:32<07:56, 2.63s/it]
|
||
47%|████▋ | 160/340 [07:35<07:58, 2.66s/it]
|
||
|
||
{'loss': 0.5951, 'grad_norm': 5.473912239074707, 'learning_rate': 3.208807785813777e-07, 'rewards/chosen': -0.3846417963504791, 'rewards/rejected': -0.645238995552063, 'rewards/accuracies': 0.7203124761581421, 'rewards/margins': 0.26059722900390625, 'logps/chosen': -132.09349060058594, 'logps/rejected': -188.9801788330078, 'logps/ref_chosen': -71.280517578125, 'logps/ref_rejected': -86.39788818359375, 'logits/chosen': -1.080108880996704, 'logits/rejected': -1.0139106512069702, 'kl/p_epsilon_steps': 0.690625011920929, 'kl/n_epsilon_steps': 0.30937498807907104, 'kl/beta': 0.0063315341249108315, 'kl/avg_steps': 0.3812499940395355, 'epoch': 0.47}
|
||
|
||
47%|████▋ | 160/340 [07:35<07:58, 2.66s/it]
|
||
47%|████▋ | 161/340 [07:37<07:56, 2.66s/it]
|
||
48%|████▊ | 162/340 [07:40<07:58, 2.69s/it]
|
||
48%|████▊ | 163/340 [07:43<08:00, 2.71s/it]
|
||
48%|████▊ | 164/340 [07:46<07:57, 2.71s/it]
|
||
49%|████▊ | 165/340 [07:48<07:53, 2.71s/it]
|
||
|
||
{'loss': 0.608, 'grad_norm': 5.492692947387695, 'learning_rate': 3.084861204504122e-07, 'rewards/chosen': -0.430931031703949, 'rewards/rejected': -0.6655236482620239, 'rewards/accuracies': 0.7093750238418579, 'rewards/margins': 0.23459258675575256, 'logps/chosen': -148.7730255126953, 'logps/rejected': -191.25628662109375, 'logps/ref_chosen': -79.35147094726562, 'logps/ref_rejected': -83.44163513183594, 'logits/chosen': -1.064668893814087, 'logits/rejected': -0.9995222091674805, 'kl/p_epsilon_steps': 0.6656249761581421, 'kl/n_epsilon_steps': 0.3343749940395355, 'kl/beta': 0.006211251951754093, 'kl/avg_steps': 0.33125001192092896, 'epoch': 0.49}
|
||
|
||
49%|████▊ | 165/340 [07:48<07:53, 2.71s/it]
|
||
49%|████▉ | 166/340 [07:51<07:40, 2.65s/it]
|
||
49%|████▉ | 167/340 [07:53<07:29, 2.60s/it]
|
||
49%|████▉ | 168/340 [07:56<07:30, 2.62s/it]
|
||
50%|████▉ | 169/340 [07:58<07:22, 2.59s/it]
|
||
50%|█████ | 170/340 [08:01<07:23, 2.61s/it]
|
||
|
||
{'loss': 0.6032, 'grad_norm': 5.870633602142334, 'learning_rate': 2.959373794541426e-07, 'rewards/chosen': -0.4399870038032532, 'rewards/rejected': -0.6865519285202026, 'rewards/accuracies': 0.7124999761581421, 'rewards/margins': 0.2465648353099823, 'logps/chosen': -147.1262664794922, 'logps/rejected': -199.21173095703125, 'logps/ref_chosen': -75.01612854003906, 'logps/ref_rejected': -86.07945251464844, 'logits/chosen': -1.0475225448608398, 'logits/rejected': -1.006306529045105, 'kl/p_epsilon_steps': 0.6734374761581421, 'kl/n_epsilon_steps': 0.32343751192092896, 'kl/beta': 0.006105704233050346, 'kl/avg_steps': 0.3499999940395355, 'epoch': 0.5}
|
||
|
||
50%|█████ | 170/340 [08:01<07:23, 2.61s/it]
|
||
50%|█████ | 171/340 [08:04<07:26, 2.64s/it]
|
||
51%|█████ | 172/340 [08:07<07:26, 2.66s/it]
|
||
51%|█████ | 173/340 [08:09<07:26, 2.67s/it]
|
||
51%|█████ | 174/340 [08:12<07:13, 2.61s/it]
|
||
51%|█████▏ | 175/340 [08:14<07:12, 2.62s/it]
|
||
|
||
{'loss': 0.5969, 'grad_norm': 5.422708988189697, 'learning_rate': 2.8326761550411346e-07, 'rewards/chosen': -0.4419892430305481, 'rewards/rejected': -0.7005925178527832, 'rewards/accuracies': 0.729687511920929, 'rewards/margins': 0.2586033344268799, 'logps/chosen': -149.66494750976562, 'logps/rejected': -206.0808563232422, 'logps/ref_chosen': -75.85931396484375, 'logps/ref_rejected': -88.4763412475586, 'logits/chosen': -1.037719488143921, 'logits/rejected': -0.9720247387886047, 'kl/p_epsilon_steps': 0.7046874761581421, 'kl/n_epsilon_steps': 0.29374998807907104, 'kl/beta': 0.0059935590252280235, 'kl/avg_steps': 0.41093748807907104, 'epoch': 0.51}
|
||
|
||
51%|█████▏ | 175/340 [08:14<07:12, 2.62s/it]
|
||
52%|█████▏ | 176/340 [08:17<07:14, 2.65s/it]
|
||
52%|█████▏ | 177/340 [08:20<07:14, 2.67s/it]
|
||
52%|█████▏ | 178/340 [08:23<07:14, 2.68s/it]
|
||
53%|█████▎ | 179/340 [08:25<07:12, 2.69s/it]
|
||
53%|█████▎ | 180/340 [08:28<07:07, 2.67s/it]
|
||
|
||
{'loss': 0.6093, 'grad_norm': 5.140402793884277, 'learning_rate': 2.7051020734928443e-07, 'rewards/chosen': -0.4056355059146881, 'rewards/rejected': -0.6422106027603149, 'rewards/accuracies': 0.692187488079071, 'rewards/margins': 0.23657508194446564, 'logps/chosen': -143.4625701904297, 'logps/rejected': -188.22622680664062, 'logps/ref_chosen': -74.5296859741211, 'logps/ref_rejected': -78.44059753417969, 'logits/chosen': -1.0452353954315186, 'logits/rejected': -0.968549370765686, 'kl/p_epsilon_steps': 0.6546875238418579, 'kl/n_epsilon_steps': 0.3453125059604645, 'kl/beta': 0.005884683690965176, 'kl/avg_steps': 0.30937498807907104, 'epoch': 0.53}
|
||
|
||
53%|█████▎ | 180/340 [08:28<07:07, 2.67s/it]
|
||
53%|█████▎ | 181/340 [08:31<07:08, 2.70s/it]
|
||
54%|█████▎ | 182/340 [08:33<07:05, 2.69s/it]
|
||
54%|█████▍ | 183/340 [08:36<07:07, 2.72s/it]
|
||
54%|█████▍ | 184/340 [08:39<07:11, 2.77s/it]
|
||
54%|█████▍ | 185/340 [08:42<07:07, 2.76s/it]
|
||
|
||
{'loss': 0.5968, 'grad_norm': 5.03032112121582, 'learning_rate': 2.5769876463904263e-07, 'rewards/chosen': -0.3904454708099365, 'rewards/rejected': -0.6427868008613586, 'rewards/accuracies': 0.7328125238418579, 'rewards/margins': 0.25234130024909973, 'logps/chosen': -137.92031860351562, 'logps/rejected': -197.1123809814453, 'logps/ref_chosen': -70.28861999511719, 'logps/ref_rejected': -85.20851135253906, 'logits/chosen': -1.0298566818237305, 'logits/rejected': -0.9755008816719055, 'kl/p_epsilon_steps': 0.698437511920929, 'kl/n_epsilon_steps': 0.30000001192092896, 'kl/beta': 0.005778872407972813, 'kl/avg_steps': 0.3984375, 'epoch': 0.54}
|
||
|
||
54%|█████▍ | 185/340 [08:42<07:07, 2.76s/it]
|
||
55%|█████▍ | 186/340 [08:44<07:04, 2.76s/it]
|
||
55%|█████▌ | 187/340 [08:47<07:01, 2.75s/it]
|
||
55%|█████▌ | 188/340 [08:50<06:54, 2.73s/it]
|
||
56%|█████▌ | 189/340 [08:52<06:43, 2.67s/it]
|
||
56%|█████▌ | 190/340 [08:55<06:44, 2.70s/it]
|
||
|
||
{'loss': 0.5951, 'grad_norm': 6.057910919189453, 'learning_rate': 2.4486703937790243e-07, 'rewards/chosen': -0.43261224031448364, 'rewards/rejected': -0.7007459402084351, 'rewards/accuracies': 0.731249988079071, 'rewards/margins': 0.2681336998939514, 'logps/chosen': -151.2794952392578, 'logps/rejected': -214.67868041992188, 'logps/ref_chosen': -75.0217514038086, 'logps/ref_rejected': -90.4836654663086, 'logits/chosen': -1.0044220685958862, 'logits/rejected': -0.9527886509895325, 'kl/p_epsilon_steps': 0.6796875, 'kl/n_epsilon_steps': 0.3203125, 'kl/beta': 0.005678877234458923, 'kl/avg_steps': 0.359375, 'epoch': 0.56}
|
||
|
||
56%|█████▌ | 190/340 [08:55<06:44, 2.70s/it]
|
||
56%|█████▌ | 191/340 [08:58<06:42, 2.70s/it]
|
||
56%|█████▋ | 192/340 [09:00<06:33, 2.66s/it]
|
||
57%|█████▋ | 193/340 [09:03<06:31, 2.67s/it]
|
||
57%|█████▋ | 194/340 [09:06<06:36, 2.71s/it]
|
||
57%|█████▋ | 195/340 [09:08<06:26, 2.66s/it]
|
||
|
||
{'loss': 0.6019, 'grad_norm': 5.573934555053711, 'learning_rate': 2.320488370051681e-07, 'rewards/chosen': -0.4517548084259033, 'rewards/rejected': -0.7048214673995972, 'rewards/accuracies': 0.721875011920929, 'rewards/margins': 0.25306665897369385, 'logps/chosen': -154.51953125, 'logps/rejected': -211.646240234375, 'logps/ref_chosen': -73.42979431152344, 'logps/ref_rejected': -84.43408203125, 'logits/chosen': -0.989575207233429, 'logits/rejected': -0.9092248678207397, 'kl/p_epsilon_steps': 0.675000011920929, 'kl/n_epsilon_steps': 0.32499998807907104, 'kl/beta': 0.005573070142418146, 'kl/avg_steps': 0.3499999940395355, 'epoch': 0.57}
|
||
|
||
57%|█████▋ | 195/340 [09:09<06:26, 2.66s/it]
|
||
58%|█████▊ | 196/340 [09:11<06:21, 2.65s/it]
|
||
58%|█████▊ | 197/340 [09:14<06:25, 2.70s/it]
|
||
58%|█████▊ | 198/340 [09:16<06:15, 2.65s/it]
|
||
59%|█████▊ | 199/340 [09:19<06:14, 2.66s/it]
|
||
59%|█████▉ | 200/340 [09:22<06:06, 2.62s/it]
|
||
|
||
{'loss': 0.5934, 'grad_norm': 5.598110198974609, 'learning_rate': 2.192779273338215e-07, 'rewards/chosen': -0.4459984302520752, 'rewards/rejected': -0.7237256765365601, 'rewards/accuracies': 0.7109375, 'rewards/margins': 0.27772727608680725, 'logps/chosen': -159.2919464111328, 'logps/rejected': -219.63626098632812, 'logps/ref_chosen': -77.8104019165039, 'logps/ref_rejected': -86.66553497314453, 'logits/chosen': -0.9810283780097961, 'logits/rejected': -0.8921745419502258, 'kl/p_epsilon_steps': 0.676562488079071, 'kl/n_epsilon_steps': 0.3218750059604645, 'kl/beta': 0.005477838683873415, 'kl/avg_steps': 0.35468751192092896, 'epoch': 0.59}
|
||
|
||
59%|█████▉ | 200/340 [09:22<06:06, 2.62s/it][INFO|trainer.py:4307] 2026-04-10 23:53:15,687 >>
|
||
***** Running Evaluation *****
|
||
[INFO|trainer.py:4309] 2026-04-10 23:53:15,687 >> Num examples = 2339
|
||
[INFO|trainer.py:4312] 2026-04-10 23:53:15,688 >> Batch size = 16
|
||
|
||
|
||
0%| | 0/18 [00:00<?, ?it/s][A
|
||
|
||
11%|█ | 2/18 [00:01<00:09, 1.62it/s][A
|
||
|
||
17%|█▋ | 3/18 [00:02<00:12, 1.17it/s][A
|
||
|
||
22%|██▏ | 4/18 [00:03<00:14, 1.00s/it][A
|
||
|
||
28%|██▊ | 5/18 [00:04<00:14, 1.09s/it][A
|
||
|
||
33%|███▎ | 6/18 [00:06<00:13, 1.14s/it][A
|
||
|
||
39%|███▉ | 7/18 [00:07<00:12, 1.18s/it][A
|
||
|
||
44%|████▍ | 8/18 [00:08<00:12, 1.20s/it][A
|
||
|
||
50%|█████ | 9/18 [00:09<00:10, 1.21s/it][A
|
||
|
||
56%|█████▌ | 10/18 [00:11<00:09, 1.22s/it][A
|
||
|
||
61%|██████ | 11/18 [00:12<00:08, 1.22s/it][A
|
||
|
||
67%|██████▋ | 12/18 [00:13<00:07, 1.24s/it][A
|
||
|
||
72%|███████▏ | 13/18 [00:14<00:06, 1.23s/it][A
|
||
|
||
78%|███████▊ | 14/18 [00:16<00:04, 1.24s/it][A
|
||
|
||
83%|████████▎ | 15/18 [00:17<00:03, 1.25s/it][A
|
||
|
||
89%|████████▉ | 16/18 [00:18<00:02, 1.25s/it][A
|
||
|
||
94%|█████████▍| 17/18 [00:19<00:01, 1.24s/it][A
|
||
|
||
100%|██████████| 18/18 [00:21<00:00, 1.23s/it][A
|
||
|
||
|
||
|
||
[A{'eval_loss': 0.6429124474525452, 'eval_runtime': 22.339, 'eval_samples_per_second': 104.705, 'eval_steps_per_second': 0.851, 'eval_rewards/chosen': -0.5156466960906982, 'eval_rewards/rejected': -0.6819863319396973, 'eval_rewards/accuracies': 0.6323784589767456, 'eval_rewards/margins': 0.1663396805524826, 'eval_logps/chosen': -182.90843200683594, 'eval_logps/rejected': -209.30340576171875, 'eval_logps/ref_chosen': -87.82356262207031, 'eval_logps/ref_rejected': -82.81887817382812, 'eval_logits/chosen': -0.9817464351654053, 'eval_logits/rejected': -0.8951107859611511, 'eval_kl/p_epsilon_steps': 0.6037326455116272, 'eval_kl/n_epsilon_steps': 0.3958333432674408, 'epoch': 0.59}
|
||
|
||
59%|█████▉ | 200/340 [09:44<06:06, 2.62s/it]
|
||
|
||
100%|██████████| 18/18 [00:21<00:00, 1.23s/it][A
|
||
|
||
[A[INFO|trainer.py:3984] 2026-04-10 23:53:52,882 >> Saving model checkpoint to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/checkpoint-200
|
||
[INFO|configuration_utils.py:419] 2026-04-10 23:53:52,889 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/checkpoint-200/config.json
|
||
[INFO|configuration_utils.py:911] 2026-04-10 23:53:52,895 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/checkpoint-200/generation_config.json
|
||
[INFO|modeling_utils.py:3580] 2026-04-10 23:54:34,634 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 6 checkpoint shards. You can find where each parameters has been saved in the index located at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/checkpoint-200/model.safetensors.index.json.
|
||
[INFO|tokenization_utils_base.py:2510] 2026-04-10 23:54:34,644 >> tokenizer config file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/checkpoint-200/tokenizer_config.json
|
||
[INFO|tokenization_utils_base.py:2519] 2026-04-10 23:54:34,649 >> Special tokens file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/checkpoint-200/special_tokens_map.json
|
||
|
||
59%|█████▉ | 201/340 [13:53<3:13:02, 83.33s/it]
|
||
59%|█████▉ | 202/340 [13:56<2:15:53, 59.09s/it]
|
||
60%|█████▉ | 203/340 [13:58<1:36:07, 42.10s/it]
|
||
60%|██████ | 204/340 [14:01<1:08:37, 30.28s/it]
|
||
60%|██████ | 205/340 [14:04<49:31, 22.01s/it]
|
||
|
||
{'loss': 0.5976, 'grad_norm': 5.395305156707764, 'learning_rate': 2.065879555832674e-07, 'rewards/chosen': -0.42064207792282104, 'rewards/rejected': -0.6905413866043091, 'rewards/accuracies': 0.7015625238418579, 'rewards/margins': 0.26989927887916565, 'logps/chosen': -150.00833129882812, 'logps/rejected': -207.3239288330078, 'logps/ref_chosen': -71.83072662353516, 'logps/ref_rejected': -78.26126861572266, 'logits/chosen': -0.9339988827705383, 'logits/rejected': -0.8349924087524414, 'kl/p_epsilon_steps': 0.6578124761581421, 'kl/n_epsilon_steps': 0.3421874940395355, 'kl/beta': 0.005382629111409187, 'kl/avg_steps': 0.31562501192092896, 'epoch': 0.6}
|
||
|
||
60%|██████ | 205/340 [14:04<49:31, 22.01s/it]
|
||
61%|██████ | 206/340 [14:06<35:58, 16.11s/it]
|
||
61%|██████ | 207/340 [14:09<26:53, 12.13s/it]
|
||
61%|██████ | 208/340 [14:12<20:29, 9.32s/it]
|
||
61%|██████▏ | 209/340 [14:14<15:54, 7.29s/it]
|
||
62%|██████▏ | 210/340 [14:17<12:43, 5.87s/it]
|
||
|
||
{'loss': 0.5961, 'grad_norm': 8.636336326599121, 'learning_rate': 1.9401235374032425e-07, 'rewards/chosen': -0.4700423777103424, 'rewards/rejected': -0.7500611543655396, 'rewards/accuracies': 0.7124999761581421, 'rewards/margins': 0.28001874685287476, 'logps/chosen': -169.9760284423828, 'logps/rejected': -226.44479370117188, 'logps/ref_chosen': -81.13362121582031, 'logps/ref_rejected': -83.91246032714844, 'logits/chosen': -0.940881073474884, 'logits/rejected': -0.835827648639679, 'kl/p_epsilon_steps': 0.667187511920929, 'kl/n_epsilon_steps': 0.33281248807907104, 'kl/beta': 0.005294554866850376, 'kl/avg_steps': 0.3343749940395355, 'epoch': 0.62}
|
||
|
||
62%|██████▏ | 210/340 [14:17<12:43, 5.87s/it]
|
||
62%|██████▏ | 211/340 [14:19<10:35, 4.93s/it]
|
||
62%|██████▏ | 212/340 [14:22<09:05, 4.26s/it]
|
||
63%|██████▎ | 213/340 [14:25<08:03, 3.81s/it]
|
||
63%|██████▎ | 214/340 [14:28<07:19, 3.49s/it]
|
||
63%|██████▎ | 215/340 [14:30<06:48, 3.27s/it]
|
||
|
||
{'loss': 0.5994, 'grad_norm': 5.697958946228027, 'learning_rate': 1.8158425248197928e-07, 'rewards/chosen': -0.4653104245662689, 'rewards/rejected': -0.7343412637710571, 'rewards/accuracies': 0.737500011920929, 'rewards/margins': 0.2690308690071106, 'logps/chosen': -168.97909545898438, 'logps/rejected': -225.5334014892578, 'logps/ref_chosen': -79.5214614868164, 'logps/ref_rejected': -83.58778381347656, 'logits/chosen': -0.9595499038696289, 'logits/rejected': -0.8254610300064087, 'kl/p_epsilon_steps': 0.6890624761581421, 'kl/n_epsilon_steps': 0.30937498807907104, 'kl/beta': 0.005207170732319355, 'kl/avg_steps': 0.37968748807907104, 'epoch': 0.63}
|
||
|
||
63%|██████▎ | 215/340 [14:30<06:48, 3.27s/it]
|
||
64%|██████▎ | 216/340 [14:33<06:25, 3.11s/it]
|
||
64%|██████▍ | 217/340 [14:36<06:12, 3.03s/it]
|
||
64%|██████▍ | 218/340 [14:39<05:55, 2.91s/it]
|
||
64%|██████▍ | 219/340 [14:41<05:44, 2.85s/it]
|
||
65%|██████▍ | 220/340 [14:44<05:36, 2.80s/it]
|
||
|
||
{'loss': 0.6056, 'grad_norm': 5.304469108581543, 'learning_rate': 1.6933639389195134e-07, 'rewards/chosen': -0.43559327721595764, 'rewards/rejected': -0.6726005673408508, 'rewards/accuracies': 0.723437488079071, 'rewards/margins': 0.2370072603225708, 'logps/chosen': -166.537353515625, 'logps/rejected': -215.3668670654297, 'logps/ref_chosen': -81.25938415527344, 'logps/ref_rejected': -83.04185485839844, 'logits/chosen': -0.9539089202880859, 'logits/rejected': -0.8665965795516968, 'kl/p_epsilon_steps': 0.667187511920929, 'kl/n_epsilon_steps': 0.33281248807907104, 'kl/beta': 0.005111886188387871, 'kl/avg_steps': 0.3343749940395355, 'epoch': 0.65}
|
||
|
||
65%|██████▍ | 220/340 [14:44<05:36, 2.80s/it]
|
||
65%|██████▌ | 221/340 [14:47<05:33, 2.80s/it]
|
||
65%|██████▌ | 222/340 [14:50<05:29, 2.80s/it]
|
||
66%|██████▌ | 223/340 [14:52<05:23, 2.77s/it]
|
||
66%|██████▌ | 224/340 [14:55<05:13, 2.70s/it]
|
||
66%|██████▌ | 225/340 [14:58<05:07, 2.68s/it]
|
||
|
||
{'loss': 0.5839, 'grad_norm': 5.622444152832031, 'learning_rate': 1.573010452010098e-07, 'rewards/chosen': -0.4237908720970154, 'rewards/rejected': -0.7200239896774292, 'rewards/accuracies': 0.765625, 'rewards/margins': 0.2962331175804138, 'logps/chosen': -162.01535034179688, 'logps/rejected': -233.6844024658203, 'logps/ref_chosen': -77.427001953125, 'logps/ref_rejected': -89.23592376708984, 'logits/chosen': -0.9484726190567017, 'logits/rejected': -0.8518384695053101, 'kl/p_epsilon_steps': 0.723437488079071, 'kl/n_epsilon_steps': 0.27656251192092896, 'kl/beta': 0.005018714815378189, 'kl/avg_steps': 0.4468750059604645, 'epoch': 0.66}
|
||
|
||
66%|██████▌ | 225/340 [14:58<05:07, 2.68s/it]
|
||
66%|██████▋ | 226/340 [15:00<05:06, 2.69s/it]
|
||
67%|██████▋ | 227/340 [15:03<05:02, 2.68s/it]
|
||
67%|██████▋ | 228/340 [15:06<05:01, 2.69s/it]
|
||
67%|██████▋ | 229/340 [15:08<05:04, 2.74s/it]
|
||
68%|██████▊ | 230/340 [15:11<05:01, 2.74s/it]
|
||
|
||
{'loss': 0.5866, 'grad_norm': 5.60673189163208, 'learning_rate': 1.4550991377830423e-07, 'rewards/chosen': -0.42099839448928833, 'rewards/rejected': -0.7057094573974609, 'rewards/accuracies': 0.7671874761581421, 'rewards/margins': 0.2847110629081726, 'logps/chosen': -156.29066467285156, 'logps/rejected': -232.82858276367188, 'logps/ref_chosen': -70.1819839477539, 'logps/ref_rejected': -87.79248046875, 'logits/chosen': -0.9383388757705688, 'logits/rejected': -0.856258749961853, 'kl/p_epsilon_steps': 0.7406250238418579, 'kl/n_epsilon_steps': 0.2593750059604645, 'kl/beta': 0.004900630097836256, 'kl/avg_steps': 0.48124998807907104, 'epoch': 0.68}
|
||
|
||
68%|██████▊ | 230/340 [15:11<05:01, 2.74s/it]
|
||
68%|██████▊ | 231/340 [15:14<04:57, 2.73s/it]
|
||
68%|██████▊ | 232/340 [15:16<04:47, 2.67s/it]
|
||
69%|██████▊ | 233/340 [15:19<04:47, 2.68s/it]
|
||
69%|██████▉ | 234/340 [15:22<04:45, 2.69s/it]
|
||
69%|██████▉ | 235/340 [15:25<04:45, 2.72s/it]
|
||
|
||
{'loss': 0.583, 'grad_norm': 5.7163004875183105, 'learning_rate': 1.339940635976592e-07, 'rewards/chosen': -0.4634285569190979, 'rewards/rejected': -0.7673560976982117, 'rewards/accuracies': 0.7734375, 'rewards/margins': 0.30392760038375854, 'logps/chosen': -174.5547637939453, 'logps/rejected': -251.2958526611328, 'logps/ref_chosen': -77.51251220703125, 'logps/ref_rejected': -89.81958770751953, 'logits/chosen': -0.8863986134529114, 'logits/rejected': -0.8059118390083313, 'kl/p_epsilon_steps': 0.7406250238418579, 'kl/n_epsilon_steps': 0.2578125, 'kl/beta': 0.004785512108355761, 'kl/avg_steps': 0.4828124940395355, 'epoch': 0.69}
|
||
|
||
69%|██████▉ | 235/340 [15:25<04:45, 2.72s/it]
|
||
69%|██████▉ | 236/340 [15:27<04:44, 2.74s/it]
|
||
70%|██████▉ | 237/340 [15:30<04:42, 2.74s/it]
|
||
70%|███████ | 238/340 [15:33<04:38, 2.73s/it]
|
||
70%|███████ | 239/340 [15:36<04:35, 2.72s/it]
|
||
71%|███████ | 240/340 [15:38<04:30, 2.71s/it]
|
||
|
||
{'loss': 0.5968, 'grad_norm': 6.860780715942383, 'learning_rate': 1.227838333989088e-07, 'rewards/chosen': -0.4815599322319031, 'rewards/rejected': -0.7540072202682495, 'rewards/accuracies': 0.715624988079071, 'rewards/margins': 0.27244722843170166, 'logps/chosen': -177.47744750976562, 'logps/rejected': -243.76736450195312, 'logps/ref_chosen': -74.5803451538086, 'logps/ref_rejected': -81.81297302246094, 'logits/chosen': -0.8450605273246765, 'logits/rejected': -0.7272099256515503, 'kl/p_epsilon_steps': 0.6812499761581421, 'kl/n_epsilon_steps': 0.3187499940395355, 'kl/beta': 0.004683743230998516, 'kl/avg_steps': 0.36250001192092896, 'epoch': 0.71}
|
||
|
||
71%|███████ | 240/340 [15:38<04:30, 2.71s/it]
|
||
71%|███████ | 241/340 [15:41<04:32, 2.75s/it]
|
||
71%|███████ | 242/340 [15:44<04:22, 2.68s/it]
|
||
71%|███████▏ | 243/340 [15:46<04:17, 2.65s/it]
|
||
72%|███████▏ | 244/340 [15:49<04:15, 2.66s/it]
|
||
72%|███████▏ | 245/340 [15:52<04:15, 2.69s/it]
|
||
|
||
{'loss': 0.5827, 'grad_norm': 5.382650852203369, 'learning_rate': 1.1190875675987355e-07, 'rewards/chosen': -0.46838441491127014, 'rewards/rejected': -0.7710477113723755, 'rewards/accuracies': 0.7281249761581421, 'rewards/margins': 0.30266332626342773, 'logps/chosen': -178.53826904296875, 'logps/rejected': -255.5751495361328, 'logps/ref_chosen': -76.56635284423828, 'logps/ref_rejected': -86.859130859375, 'logits/chosen': -0.8378638029098511, 'logits/rejected': -0.7307332158088684, 'kl/p_epsilon_steps': 0.699999988079071, 'kl/n_epsilon_steps': 0.30000001192092896, 'kl/beta': 0.004598929081112146, 'kl/avg_steps': 0.4000000059604645, 'epoch': 0.72}
|
||
|
||
72%|███████▏ | 245/340 [15:52<04:15, 2.69s/it]
|
||
72%|███████▏ | 246/340 [15:54<04:13, 2.69s/it]
|
||
73%|███████▎ | 247/340 [15:57<04:13, 2.72s/it]
|
||
73%|███████▎ | 248/340 [16:00<04:09, 2.71s/it]
|
||
73%|███████▎ | 249/340 [16:02<04:06, 2.71s/it]
|
||
74%|███████▎ | 250/340 [16:05<04:02, 2.70s/it]
|
||
|
||
{'loss': 0.6155, 'grad_norm': 5.63203763961792, 'learning_rate': 1.0139748428955333e-07, 'rewards/chosen': -0.4800783693790436, 'rewards/rejected': -0.7040629982948303, 'rewards/accuracies': 0.706250011920929, 'rewards/margins': 0.22398455440998077, 'logps/chosen': -183.86294555664062, 'logps/rejected': -237.01980590820312, 'logps/ref_chosen': -77.37183380126953, 'logps/ref_rejected': -79.96475219726562, 'logits/chosen': -0.8333392143249512, 'logits/rejected': -0.7355720400810242, 'kl/p_epsilon_steps': 0.675000011920929, 'kl/n_epsilon_steps': 0.32499998807907104, 'kl/beta': 0.00451111001893878, 'kl/avg_steps': 0.3499999940395355, 'epoch': 0.74}
|
||
|
||
74%|███████▎ | 250/340 [16:05<04:02, 2.70s/it]
|
||
74%|███████▍ | 251/340 [16:08<04:03, 2.74s/it]
|
||
74%|███████▍ | 252/340 [16:11<04:00, 2.74s/it]
|
||
74%|███████▍ | 253/340 [16:13<03:54, 2.70s/it]
|
||
75%|███████▍ | 254/340 [16:16<03:54, 2.72s/it]
|
||
75%|███████▌ | 255/340 [16:19<03:51, 2.72s/it]
|
||
|
||
{'loss': 0.6013, 'grad_norm': 5.822533130645752, 'learning_rate': 9.127770814751932e-08, 'rewards/chosen': -0.46239757537841797, 'rewards/rejected': -0.7161475419998169, 'rewards/accuracies': 0.699999988079071, 'rewards/margins': 0.25374993681907654, 'logps/chosen': -184.06822204589844, 'logps/rejected': -246.44998168945312, 'logps/ref_chosen': -79.62632751464844, 'logps/ref_rejected': -83.8196792602539, 'logits/chosen': -0.8416454195976257, 'logits/rejected': -0.7227948904037476, 'kl/p_epsilon_steps': 0.6781250238418579, 'kl/n_epsilon_steps': 0.3218750059604645, 'kl/beta': 0.004430105909705162, 'kl/avg_steps': 0.35624998807907104, 'epoch': 0.75}
|
||
|
||
75%|███████▌ | 255/340 [16:19<03:51, 2.72s/it]
|
||
75%|███████▌ | 256/340 [16:22<03:48, 2.72s/it]
|
||
76%|███████▌ | 257/340 [16:24<03:46, 2.73s/it]
|
||
76%|███████▌ | 258/340 [16:27<03:40, 2.69s/it]
|
||
76%|███████▌ | 259/340 [16:30<03:38, 2.70s/it]
|
||
76%|███████▋ | 260/340 [16:32<03:36, 2.70s/it]
|
||
|
||
{'loss': 0.6056, 'grad_norm': 5.885540008544922, 'learning_rate': 8.15760890883607e-08, 'rewards/chosen': -0.4556017816066742, 'rewards/rejected': -0.6967115998268127, 'rewards/accuracies': 0.7124999761581421, 'rewards/margins': 0.2411097288131714, 'logps/chosen': -184.8510284423828, 'logps/rejected': -246.5211639404297, 'logps/ref_chosen': -80.03411865234375, 'logps/ref_rejected': -85.39453125, 'logits/chosen': -0.8616160154342651, 'logits/rejected': -0.7643041610717773, 'kl/p_epsilon_steps': 0.684374988079071, 'kl/n_epsilon_steps': 0.31562501192092896, 'kl/beta': 0.004350547678768635, 'kl/avg_steps': 0.3687500059604645, 'epoch': 0.76}
|
||
|
||
76%|███████▋ | 260/340 [16:32<03:36, 2.70s/it]
|
||
77%|███████▋ | 261/340 [16:35<03:32, 2.69s/it]
|
||
77%|███████▋ | 262/340 [16:38<03:34, 2.75s/it]
|
||
77%|███████▋ | 263/340 [16:41<03:30, 2.74s/it]
|
||
78%|███████▊ | 264/340 [16:43<03:28, 2.74s/it]
|
||
78%|███████▊ | 265/340 [16:46<03:24, 2.73s/it]
|
||
|
||
{'loss': 0.603, 'grad_norm': 5.461711883544922, 'learning_rate': 7.231818622338822e-08, 'rewards/chosen': -0.432711660861969, 'rewards/rejected': -0.6719975471496582, 'rewards/accuracies': 0.7109375, 'rewards/margins': 0.2392859160900116, 'logps/chosen': -178.0113067626953, 'logps/rejected': -238.1660614013672, 'logps/ref_chosen': -76.63539123535156, 'logps/ref_rejected': -79.94613647460938, 'logits/chosen': -0.8387966156005859, 'logits/rejected': -0.7239198088645935, 'kl/p_epsilon_steps': 0.676562488079071, 'kl/n_epsilon_steps': 0.32343751192092896, 'kl/beta': 0.0042734695598483086, 'kl/avg_steps': 0.3531250059604645, 'epoch': 0.78}
|
||
|
||
78%|███████▊ | 265/340 [16:46<03:24, 2.73s/it]
|
||
78%|███████▊ | 266/340 [16:49<03:23, 2.74s/it]
|
||
79%|███████▊ | 267/340 [16:52<03:19, 2.73s/it]
|
||
79%|███████▉ | 268/340 [16:54<03:15, 2.71s/it]
|
||
79%|███████▉ | 269/340 [16:57<03:11, 2.69s/it]
|
||
79%|███████▉ | 270/340 [17:00<03:10, 2.72s/it]
|
||
|
||
{'loss': 0.6021, 'grad_norm': 5.6931915283203125, 'learning_rate': 6.352838968463919e-08, 'rewards/chosen': -0.4092663824558258, 'rewards/rejected': -0.6513173580169678, 'rewards/accuracies': 0.731249988079071, 'rewards/margins': 0.24205096065998077, 'logps/chosen': -173.6400604248047, 'logps/rejected': -236.97607421875, 'logps/ref_chosen': -76.02762603759766, 'logps/ref_rejected': -80.83404541015625, 'logits/chosen': -0.8596851229667664, 'logits/rejected': -0.7193423509597778, 'kl/p_epsilon_steps': 0.698437511920929, 'kl/n_epsilon_steps': 0.30156248807907104, 'kl/beta': 0.004198429174721241, 'kl/avg_steps': 0.3968749940395355, 'epoch': 0.79}
|
||
|
||
79%|███████▉ | 270/340 [17:00<03:10, 2.72s/it]
|
||
80%|███████▉ | 271/340 [17:02<03:07, 2.71s/it]
|
||
80%|████████ | 272/340 [17:05<03:04, 2.71s/it]
|
||
80%|████████ | 273/340 [17:08<03:01, 2.70s/it]
|
||
81%|████████ | 274/340 [17:11<03:01, 2.75s/it]
|
||
81%|████████ | 275/340 [17:13<02:58, 2.75s/it]
|
||
|
||
{'loss': 0.5997, 'grad_norm': 5.091865062713623, 'learning_rate': 5.5229856368582376e-08, 'rewards/chosen': -0.4245019555091858, 'rewards/rejected': -0.6765104532241821, 'rewards/accuracies': 0.739062488079071, 'rewards/margins': 0.25200843811035156, 'logps/chosen': -180.9755859375, 'logps/rejected': -254.04690551757812, 'logps/ref_chosen': -77.58733367919922, 'logps/ref_rejected': -88.50263214111328, 'logits/chosen': -0.8502656817436218, 'logits/rejected': -0.7681766748428345, 'kl/p_epsilon_steps': 0.703125, 'kl/n_epsilon_steps': 0.296875, 'kl/beta': 0.004111775197088718, 'kl/avg_steps': 0.40625, 'epoch': 0.81}
|
||
|
||
81%|████████ | 275/340 [17:13<02:58, 2.75s/it]
|
||
81%|████████ | 276/340 [17:16<02:51, 2.68s/it]
|
||
81%|████████▏ | 277/340 [17:19<02:49, 2.69s/it]
|
||
82%|████████▏ | 278/340 [17:21<02:47, 2.69s/it]
|
||
82%|████████▏ | 279/340 [17:24<02:42, 2.67s/it]
|
||
82%|████████▏ | 280/340 [17:27<02:42, 2.70s/it]
|
||
|
||
{'loss': 0.5958, 'grad_norm': 5.737886905670166, 'learning_rate': 4.7444448928806615e-08, 'rewards/chosen': -0.4230107367038727, 'rewards/rejected': -0.6823617219924927, 'rewards/accuracies': 0.729687511920929, 'rewards/margins': 0.2593509256839752, 'logps/chosen': -186.74009704589844, 'logps/rejected': -265.301025390625, 'logps/ref_chosen': -81.46415710449219, 'logps/ref_rejected': -94.69911193847656, 'logits/chosen': -0.876409649848938, 'logits/rejected': -0.7702105641365051, 'kl/p_epsilon_steps': 0.7124999761581421, 'kl/n_epsilon_steps': 0.2874999940395355, 'kl/beta': 0.004024769179522991, 'kl/avg_steps': 0.42500001192092896, 'epoch': 0.82}
|
||
|
||
82%|████████▏ | 280/340 [17:27<02:42, 2.70s/it]
|
||
83%|████████▎ | 281/340 [17:29<02:40, 2.71s/it]
|
||
83%|████████▎ | 282/340 [17:32<02:37, 2.72s/it]
|
||
83%|████████▎ | 283/340 [17:35<02:35, 2.73s/it]
|
||
84%|████████▎ | 284/340 [17:38<02:32, 2.72s/it]
|
||
84%|████████▍ | 285/340 [17:40<02:30, 2.73s/it]
|
||
|
||
{'loss': 0.6036, 'grad_norm': 5.05964469909668, 'learning_rate': 4.019267817841834e-08, 'rewards/chosen': -0.41665568947792053, 'rewards/rejected': -0.6515552997589111, 'rewards/accuracies': 0.731249988079071, 'rewards/margins': 0.23489956557750702, 'logps/chosen': -183.66696166992188, 'logps/rejected': -251.9569854736328, 'logps/ref_chosen': -77.9266128540039, 'logps/ref_rejected': -85.77226257324219, 'logits/chosen': -0.8164280652999878, 'logits/rejected': -0.7374383211135864, 'kl/p_epsilon_steps': 0.699999988079071, 'kl/n_epsilon_steps': 0.30000001192092896, 'kl/beta': 0.003945710603147745, 'kl/avg_steps': 0.4000000059604645, 'epoch': 0.84}
|
||
|
||
84%|████████▍ | 285/340 [17:40<02:30, 2.73s/it]
|
||
84%|████████▍ | 286/340 [17:43<02:27, 2.72s/it]
|
||
84%|████████▍ | 287/340 [17:46<02:23, 2.71s/it]
|
||
85%|████████▍ | 288/340 [17:48<02:20, 2.71s/it]
|
||
85%|████████▌ | 289/340 [17:51<02:17, 2.70s/it]
|
||
85%|████████▌ | 290/340 [17:54<02:14, 2.69s/it]
|
||
|
||
{'loss': 0.6008, 'grad_norm': 4.995400428771973, 'learning_rate': 3.349364905389032e-08, 'rewards/chosen': -0.41073670983314514, 'rewards/rejected': -0.6532593965530396, 'rewards/accuracies': 0.7250000238418579, 'rewards/margins': 0.24252267181873322, 'logps/chosen': -178.77645874023438, 'logps/rejected': -253.64126586914062, 'logps/ref_chosen': -72.49942016601562, 'logps/ref_rejected': -83.77849578857422, 'logits/chosen': -0.788993775844574, 'logits/rejected': -0.7104808688163757, 'kl/p_epsilon_steps': 0.6859375238418579, 'kl/n_epsilon_steps': 0.3140625059604645, 'kl/beta': 0.003868584055453539, 'kl/avg_steps': 0.37187498807907104, 'epoch': 0.85}
|
||
|
||
85%|████████▌ | 290/340 [17:54<02:14, 2.69s/it]
|
||
86%|████████▌ | 291/340 [17:56<02:12, 2.70s/it]
|
||
86%|████████▌ | 292/340 [17:59<02:09, 2.70s/it]
|
||
86%|████████▌ | 293/340 [18:02<02:07, 2.71s/it]
|
||
86%|████████▋ | 294/340 [18:05<02:02, 2.67s/it]
|
||
87%|████████▋ | 295/340 [18:07<02:00, 2.68s/it]
|
||
|
||
{'loss': 0.6044, 'grad_norm': 5.24601411819458, 'learning_rate': 2.736501028272095e-08, 'rewards/chosen': -0.4162219166755676, 'rewards/rejected': -0.6554089784622192, 'rewards/accuracies': 0.739062488079071, 'rewards/margins': 0.23918703198432922, 'logps/chosen': -182.55836486816406, 'logps/rejected': -265.3271789550781, 'logps/ref_chosen': -72.81735229492188, 'logps/ref_rejected': -91.62478637695312, 'logits/chosen': -0.7796621918678284, 'logits/rejected': -0.7315692901611328, 'kl/p_epsilon_steps': 0.699999988079071, 'kl/n_epsilon_steps': 0.30000001192092896, 'kl/beta': 0.0037986349780112505, 'kl/avg_steps': 0.4000000059604645, 'epoch': 0.87}
|
||
|
||
87%|████████▋ | 295/340 [18:07<02:00, 2.68s/it]
|
||
87%|████████▋ | 296/340 [18:10<01:57, 2.68s/it]
|
||
87%|████████▋ | 297/340 [18:13<01:55, 2.68s/it]
|
||
88%|████████▊ | 298/340 [18:15<01:53, 2.70s/it]
|
||
88%|████████▊ | 299/340 [18:18<01:50, 2.69s/it]
|
||
88%|████████▊ | 300/340 [18:21<01:49, 2.74s/it]
|
||
|
||
{'loss': 0.6156, 'grad_norm': 5.059004306793213, 'learning_rate': 2.1822907887504932e-08, 'rewards/chosen': -0.40281882882118225, 'rewards/rejected': -0.6089481115341187, 'rewards/accuracies': 0.7265625, 'rewards/margins': 0.20612934231758118, 'logps/chosen': -178.6937255859375, 'logps/rejected': -241.739013671875, 'logps/ref_chosen': -70.4697265625, 'logps/ref_rejected': -77.26274108886719, 'logits/chosen': -0.7761374711990356, 'logits/rejected': -0.6450864672660828, 'kl/p_epsilon_steps': 0.6968749761581421, 'kl/n_epsilon_steps': 0.3031249940395355, 'kl/beta': 0.003725191578269005, 'kl/avg_steps': 0.39375001192092896, 'epoch': 0.88}
|
||
|
||
88%|████████▊ | 300/340 [18:21<01:49, 2.74s/it][INFO|trainer.py:4307] 2026-04-11 00:02:14,896 >>
|
||
***** Running Evaluation *****
|
||
[INFO|trainer.py:4309] 2026-04-11 00:02:14,896 >> Num examples = 2339
|
||
[INFO|trainer.py:4312] 2026-04-11 00:02:14,896 >> Batch size = 16
|
||
|
||
|
||
0%| | 0/18 [00:00<?, ?it/s][A
|
||
|
||
11%|█ | 2/18 [00:01<00:09, 1.62it/s][A
|
||
|
||
17%|█▋ | 3/18 [00:02<00:12, 1.16it/s][A
|
||
|
||
22%|██▏ | 4/18 [00:03<00:14, 1.00s/it][A
|
||
|
||
28%|██▊ | 5/18 [00:04<00:14, 1.09s/it][A
|
||
|
||
33%|███▎ | 6/18 [00:06<00:13, 1.15s/it][A
|
||
|
||
39%|███▉ | 7/18 [00:07<00:13, 1.18s/it][A
|
||
|
||
44%|████▍ | 8/18 [00:08<00:12, 1.21s/it][A
|
||
|
||
50%|█████ | 9/18 [00:09<00:10, 1.21s/it][A
|
||
|
||
56%|█████▌ | 10/18 [00:11<00:09, 1.23s/it][A
|
||
|
||
61%|██████ | 11/18 [00:12<00:08, 1.22s/it][A
|
||
|
||
67%|██████▋ | 12/18 [00:13<00:07, 1.24s/it][A
|
||
|
||
72%|███████▏ | 13/18 [00:14<00:06, 1.24s/it][A
|
||
|
||
78%|███████▊ | 14/18 [00:16<00:04, 1.25s/it][A
|
||
|
||
83%|████████▎ | 15/18 [00:17<00:03, 1.25s/it][A
|
||
|
||
89%|████████▉ | 16/18 [00:18<00:02, 1.25s/it][A
|
||
|
||
94%|█████████▍| 17/18 [00:19<00:01, 1.25s/it][A
|
||
|
||
100%|██████████| 18/18 [00:21<00:00, 1.24s/it][A
|
||
|
||
|
||
|
||
[A{'eval_loss': 0.6442785263061523, 'eval_runtime': 22.3967, 'eval_samples_per_second': 104.435, 'eval_steps_per_second': 0.848, 'eval_rewards/chosen': -0.44304588437080383, 'eval_rewards/rejected': -0.5884472131729126, 'eval_rewards/accuracies': 0.6401909589767456, 'eval_rewards/margins': 0.14540132880210876, 'eval_logps/chosen': -208.17991638183594, 'eval_logps/rejected': -243.57456970214844, 'eval_logps/ref_chosen': -87.82356262207031, 'eval_logps/ref_rejected': -82.81887817382812, 'eval_logits/chosen': -0.8325175046920776, 'eval_logits/rejected': -0.7259347438812256, 'eval_kl/p_epsilon_steps': 0.6150173544883728, 'eval_kl/n_epsilon_steps': 0.3849826455116272, 'epoch': 0.88}
|
||
|
||
88%|████████▊ | 300/340 [18:43<01:49, 2.74s/it]
|
||
|
||
100%|██████████| 18/18 [00:21<00:00, 1.24s/it][A
|
||
|
||
[A
|
||
89%|████████▊ | 301/340 [18:46<06:09, 9.47s/it]
|
||
89%|████████▉ | 302/340 [18:49<04:42, 7.43s/it]
|
||
89%|████████▉ | 303/340 [18:51<03:42, 6.01s/it]
|
||
89%|████████▉ | 304/340 [18:54<03:00, 5.00s/it]
|
||
90%|████████▉ | 305/340 [18:56<02:27, 4.21s/it]
|
||
|
||
{'loss': 0.6049, 'grad_norm': 5.704087734222412, 'learning_rate': 1.6881942648911074e-08, 'rewards/chosen': -0.3862135410308838, 'rewards/rejected': -0.6171549558639526, 'rewards/accuracies': 0.7250000238418579, 'rewards/margins': 0.23094138503074646, 'logps/chosen': -181.45826721191406, 'logps/rejected': -256.80096435546875, 'logps/ref_chosen': -75.5998764038086, 'logps/ref_rejected': -86.76122283935547, 'logits/chosen': -0.7806903719902039, 'logits/rejected': -0.7004286050796509, 'kl/p_epsilon_steps': 0.6875, 'kl/n_epsilon_steps': 0.3125, 'kl/beta': 0.003651682287454605, 'kl/avg_steps': 0.375, 'epoch': 0.9}
|
||
|
||
90%|████████▉ | 305/340 [18:56<02:27, 4.21s/it]
|
||
90%|█████████ | 306/340 [18:59<02:05, 3.70s/it]
|
||
90%|█████████ | 307/340 [19:02<01:52, 3.40s/it]
|
||
91%|█████████ | 308/340 [19:04<01:42, 3.21s/it]
|
||
91%|█████████ | 309/340 [19:07<01:35, 3.07s/it]
|
||
91%|█████████ | 310/340 [19:10<01:29, 2.99s/it]
|
||
|
||
{'loss': 0.6111, 'grad_norm': 5.218584060668945, 'learning_rate': 1.2555131639630567e-08, 'rewards/chosen': -0.4038282036781311, 'rewards/rejected': -0.6236552000045776, 'rewards/accuracies': 0.7265625, 'rewards/margins': 0.21982701122760773, 'logps/chosen': -191.44869995117188, 'logps/rejected': -258.40545654296875, 'logps/ref_chosen': -78.4868392944336, 'logps/ref_rejected': -83.08047485351562, 'logits/chosen': -0.7832438349723816, 'logits/rejected': -0.6719276309013367, 'kl/p_epsilon_steps': 0.698437511920929, 'kl/n_epsilon_steps': 0.30156248807907104, 'kl/beta': 0.0035780933685600758, 'kl/avg_steps': 0.3968749940395355, 'epoch': 0.91}
|
||
|
||
91%|█████████ | 310/340 [19:10<01:29, 2.99s/it]
|
||
91%|█████████▏| 311/340 [19:13<01:24, 2.91s/it]
|
||
92%|█████████▏| 312/340 [19:15<01:19, 2.84s/it]
|
||
92%|█████████▏| 313/340 [19:18<01:15, 2.80s/it]
|
||
92%|█████████▏| 314/340 [19:21<01:12, 2.78s/it]
|
||
93%|█████████▎| 315/340 [19:23<01:08, 2.76s/it]
|
||
|
||
{'loss': 0.6153, 'grad_norm': 6.10360860824585, 'learning_rate': 8.85387393063622e-09, 'rewards/chosen': -0.4042418897151947, 'rewards/rejected': -0.6095074415206909, 'rewards/accuracies': 0.699999988079071, 'rewards/margins': 0.20526555180549622, 'logps/chosen': -194.56436157226562, 'logps/rejected': -261.40032958984375, 'logps/ref_chosen': -79.54651641845703, 'logps/ref_rejected': -87.11808776855469, 'logits/chosen': -0.8057095408439636, 'logits/rejected': -0.7011617422103882, 'kl/p_epsilon_steps': 0.668749988079071, 'kl/n_epsilon_steps': 0.33125001192092896, 'kl/beta': 0.0035165518056601286, 'kl/avg_steps': 0.3375000059604645, 'epoch': 0.93}
|
||
|
||
93%|█████████▎| 315/340 [19:23<01:08, 2.76s/it]
|
||
93%|█████████▎| 316/340 [19:26<01:05, 2.74s/it]
|
||
93%|█████████▎| 317/340 [19:29<01:02, 2.73s/it]
|
||
94%|█████████▎| 318/340 [19:32<00:59, 2.73s/it]
|
||
94%|█████████▍| 319/340 [19:34<00:57, 2.73s/it]
|
||
94%|█████████▍| 320/340 [19:37<00:54, 2.72s/it]
|
||
|
||
{'loss': 0.6302, 'grad_norm': 5.0830488204956055, 'learning_rate': 5.7879205600998296e-09, 'rewards/chosen': -0.39771518111228943, 'rewards/rejected': -0.5684808492660522, 'rewards/accuracies': 0.668749988079071, 'rewards/margins': 0.17076563835144043, 'logps/chosen': -193.45582580566406, 'logps/rejected': -248.977783203125, 'logps/ref_chosen': -78.56401062011719, 'logps/ref_rejected': -83.85292053222656, 'logits/chosen': -0.8048986196517944, 'logits/rejected': -0.6852750778198242, 'kl/p_epsilon_steps': 0.6421874761581421, 'kl/n_epsilon_steps': 0.3578124940395355, 'kl/beta': 0.0034615718759596348, 'kl/avg_steps': 0.28437501192092896, 'epoch': 0.94}
|
||
|
||
94%|█████████▍| 320/340 [19:37<00:54, 2.72s/it]
|
||
94%|█████████▍| 321/340 [19:40<00:51, 2.72s/it]
|
||
95%|█████████▍| 322/340 [19:42<00:49, 2.72s/it]
|
||
95%|█████████▌| 323/340 [19:45<00:46, 2.73s/it]
|
||
95%|█████████▌| 324/340 [19:48<00:44, 2.77s/it]
|
||
96%|█████████▌| 325/340 [19:51<00:41, 2.75s/it]
|
||
|
||
{'loss': 0.6219, 'grad_norm': 5.110870361328125, 'learning_rate': 3.3653488440851253e-09, 'rewards/chosen': -0.3717408776283264, 'rewards/rejected': -0.5669502019882202, 'rewards/accuracies': 0.692187488079071, 'rewards/margins': 0.195209339261055, 'logps/chosen': -183.75088500976562, 'logps/rejected': -254.2379150390625, 'logps/ref_chosen': -74.60850524902344, 'logps/ref_rejected': -86.81698608398438, 'logits/chosen': -0.7829563021659851, 'logits/rejected': -0.7219451665878296, 'kl/p_epsilon_steps': 0.675000011920929, 'kl/n_epsilon_steps': 0.32499998807907104, 'kl/beta': 0.0034066252410411835, 'kl/avg_steps': 0.3499999940395355, 'epoch': 0.96}
|
||
|
||
96%|█████████▌| 325/340 [19:51<00:41, 2.75s/it]
|
||
96%|█████████▌| 326/340 [19:53<00:38, 2.72s/it]
|
||
96%|█████████▌| 327/340 [19:56<00:34, 2.67s/it]
|
||
96%|█████████▋| 328/340 [19:59<00:32, 2.68s/it]
|
||
97%|█████████▋| 329/340 [20:01<00:29, 2.65s/it]
|
||
97%|█████████▋| 330/340 [20:04<00:26, 2.68s/it]
|
||
|
||
{'loss': 0.601, 'grad_norm': 4.562494277954102, 'learning_rate': 1.592541096695571e-09, 'rewards/chosen': -0.34669384360313416, 'rewards/rejected': -0.5768105387687683, 'rewards/accuracies': 0.7578125, 'rewards/margins': 0.23011669516563416, 'logps/chosen': -178.63034057617188, 'logps/rejected': -266.2847595214844, 'logps/ref_chosen': -74.63096618652344, 'logps/ref_rejected': -92.50404357910156, 'logits/chosen': -0.7936745882034302, 'logits/rejected': -0.739700436592102, 'kl/p_epsilon_steps': 0.7359374761581421, 'kl/n_epsilon_steps': 0.26249998807907104, 'kl/beta': 0.003342044074088335, 'kl/avg_steps': 0.47343748807907104, 'epoch': 0.97}
|
||
|
||
97%|█████████▋| 330/340 [20:04<00:26, 2.68s/it]
|
||
97%|█████████▋| 331/340 [20:07<00:24, 2.68s/it]
|
||
98%|█████████▊| 332/340 [20:09<00:21, 2.70s/it]
|
||
98%|█████████▊| 333/340 [20:12<00:18, 2.71s/it]
|
||
98%|█████████▊| 334/340 [20:15<00:16, 2.71s/it]
|
||
99%|█████████▊| 335/340 [20:18<00:13, 2.71s/it]
|
||
|
||
{'loss': 0.6167, 'grad_norm': 4.651317596435547, 'learning_rate': 4.741678157389739e-10, 'rewards/chosen': -0.3669508695602417, 'rewards/rejected': -0.5604615211486816, 'rewards/accuracies': 0.7203124761581421, 'rewards/margins': 0.19351065158843994, 'logps/chosen': -193.51834106445312, 'logps/rejected': -261.07110595703125, 'logps/ref_chosen': -81.25680541992188, 'logps/ref_rejected': -88.71739196777344, 'logits/chosen': -0.8402039408683777, 'logits/rejected': -0.7369452118873596, 'kl/p_epsilon_steps': 0.6781250238418579, 'kl/n_epsilon_steps': 0.3218750059604645, 'kl/beta': 0.003271129447966814, 'kl/avg_steps': 0.35624998807907104, 'epoch': 0.99}
|
||
|
||
99%|█████████▊| 335/340 [20:18<00:13, 2.71s/it]
|
||
99%|█████████▉| 336/340 [20:20<00:10, 2.67s/it]
|
||
99%|█████████▉| 337/340 [20:23<00:08, 2.71s/it]
|
||
99%|█████████▉| 338/340 [20:26<00:05, 2.70s/it]
|
||
100%|█████████▉| 339/340 [20:28<00:02, 2.61s/it]
|
||
100%|██████████| 340/340 [20:31<00:00, 2.64s/it]
|
||
|
||
{'loss': 0.612, 'grad_norm': 4.5893425941467285, 'learning_rate': 1.31753782067201e-11, 'rewards/chosen': -0.36068642139434814, 'rewards/rejected': -0.5670695900917053, 'rewards/accuracies': 0.721875011920929, 'rewards/margins': 0.20638315379619598, 'logps/chosen': -185.0140838623047, 'logps/rejected': -256.5284423828125, 'logps/ref_chosen': -72.54796600341797, 'logps/ref_rejected': -78.83277893066406, 'logits/chosen': -0.7557514905929565, 'logits/rejected': -0.6398700475692749, 'kl/p_epsilon_steps': 0.6890624761581421, 'kl/n_epsilon_steps': 0.3109374940395355, 'kl/beta': 0.003211395815014839, 'kl/avg_steps': 0.37812501192092896, 'epoch': 1.0}
|
||
|
||
100%|██████████| 340/340 [20:31<00:00, 2.64s/it][INFO|trainer.py:3984] 2026-04-11 00:04:39,871 >> Saving model checkpoint to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/checkpoint-340
|
||
[INFO|configuration_utils.py:419] 2026-04-11 00:04:39,876 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/checkpoint-340/config.json
|
||
[INFO|configuration_utils.py:911] 2026-04-11 00:04:39,879 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/checkpoint-340/generation_config.json
|
||
[INFO|modeling_utils.py:3580] 2026-04-11 00:05:19,147 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 6 checkpoint shards. You can find where each parameters has been saved in the index located at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/checkpoint-340/model.safetensors.index.json.
|
||
[INFO|tokenization_utils_base.py:2510] 2026-04-11 00:05:19,153 >> tokenizer config file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/checkpoint-340/tokenizer_config.json
|
||
[INFO|tokenization_utils_base.py:2519] 2026-04-11 00:05:19,156 >> Special tokens file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/checkpoint-340/special_tokens_map.json
|
||
[INFO|trainer.py:2681] 2026-04-11 00:08:37,503 >>
|
||
|
||
Training completed. Do not forget to share your model on huggingface.co/models =)
|
||
|
||
|
||
|
||
|
||
{'train_runtime': 1489.7896, 'train_samples_per_second': 29.265, 'train_steps_per_second': 0.228, 'train_loss': 0.6232832217917723, 'epoch': 1.0}
|
||
|
||
100%|██████████| 340/340 [24:43<00:00, 2.64s/it]
|
||
100%|██████████| 340/340 [24:43<00:00, 4.36s/it]
|
||
***** train metrics *****
|
||
epoch = 1.0
|
||
total_flos = 0GF
|
||
train_loss = 0.6233
|
||
train_runtime = 0:24:49.78
|
||
train_samples = 43598
|
||
train_samples_per_second = 29.265
|
||
train_steps_per_second = 0.228
|
||
2026-04-11 00:08:37 - INFO - __main__ - *** Training complete ***
|
||
2026-04-11 00:08:37 - INFO - __main__ - *** Save model ***
|
||
[INFO|configuration_utils.py:419] 2026-04-11 00:08:55,316 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/config.json
|
||
[INFO|configuration_utils.py:911] 2026-04-11 00:08:55,320 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/generation_config.json
|
||
[INFO|modeling_utils.py:3580] 2026-04-11 00:09:43,818 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 7 checkpoint shards. You can find where each parameters has been saved in the index located at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/model.safetensors.index.json.
|
||
[INFO|tokenization_utils_base.py:2510] 2026-04-11 00:09:43,823 >> tokenizer config file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/tokenizer_config.json
|
||
[INFO|tokenization_utils_base.py:2519] 2026-04-11 00:09:43,826 >> Special tokens file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/special_tokens_map.json
|
||
2026-04-11 00:09:43 - INFO - __main__ - Saved HF-compatible model artifacts to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108
|
||
[INFO|modelcard.py:450] 2026-04-11 00:09:44,248 >> Dropping the following result as it does not have all the necessary fields:
|
||
{'dataset': {'name': 'Anthropic/hh-rlhf', 'type': 'Anthropic/hh-rlhf'}}
|
||
[INFO|configuration_utils.py:419] 2026-04-11 00:09:44,261 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108/config.json
|
||
2026-04-11 00:09:44 - INFO - __main__ - *** Evaluate ***
|
||
[INFO|trainer.py:4307] 2026-04-11 00:09:44,262 >>
|
||
***** Running Evaluation *****
|
||
[INFO|trainer.py:4309] 2026-04-11 00:09:44,262 >> Num examples = 2339
|
||
[INFO|trainer.py:4312] 2026-04-11 00:09:44,262 >> Batch size = 16
|
||
|
||
0%| | 0/18 [00:00<?, ?it/s]
|
||
11%|█ | 2/18 [00:01<00:09, 1.62it/s]
|
||
17%|█▋ | 3/18 [00:02<00:12, 1.17it/s]
|
||
22%|██▏ | 4/18 [00:03<00:13, 1.00it/s]
|
||
28%|██▊ | 5/18 [00:04<00:14, 1.08s/it]
|
||
33%|███▎ | 6/18 [00:06<00:13, 1.14s/it]
|
||
39%|███▉ | 7/18 [00:07<00:12, 1.18s/it]
|
||
44%|████▍ | 8/18 [00:08<00:11, 1.20s/it]
|
||
50%|█████ | 9/18 [00:09<00:10, 1.20s/it]
|
||
56%|█████▌ | 10/18 [00:11<00:09, 1.22s/it]
|
||
61%|██████ | 11/18 [00:12<00:08, 1.21s/it]
|
||
67%|██████▋ | 12/18 [00:13<00:07, 1.23s/it]
|
||
72%|███████▏ | 13/18 [00:14<00:06, 1.23s/it]
|
||
78%|███████▊ | 14/18 [00:16<00:04, 1.24s/it]
|
||
83%|████████▎ | 15/18 [00:17<00:03, 1.25s/it]
|
||
89%|████████▉ | 16/18 [00:18<00:02, 1.25s/it]
|
||
94%|█████████▍| 17/18 [00:19<00:01, 1.24s/it]
|
||
100%|██████████| 18/18 [00:21<00:00, 1.23s/it]
|
||
100%|██████████| 18/18 [00:21<00:00, 1.17s/it]
|
||
***** eval metrics *****
|
||
epoch = 1.0
|
||
eval_kl/n_epsilon_steps = 0.3837
|
||
eval_kl/p_epsilon_steps = 0.6159
|
||
eval_logits/chosen = -0.8295
|
||
eval_logits/rejected = -0.7225
|
||
eval_logps/chosen = -208.4018
|
||
eval_logps/ref_chosen = -87.8236
|
||
eval_logps/ref_rejected = -82.8189
|
||
eval_logps/rejected = -244.0941
|
||
eval_loss = 0.6479
|
||
eval_rewards/accuracies = 0.6415
|
||
eval_rewards/chosen = -0.3831
|
||
eval_rewards/margins = 0.1264
|
||
eval_rewards/rejected = -0.5095
|
||
eval_runtime = 0:00:22.28
|
||
eval_samples = 2339
|
||
eval_samples_per_second = 104.948
|
||
eval_steps_per_second = 0.853
|
||
2026-04-11 00:10:06 - INFO - __main__ - *** Training complete! ***
|
||
wandb: - 0.015 MB of 0.015 MB uploaded
|
||
wandb: \ 0.015 MB of 0.015 MB uploaded
|
||
wandb: | 0.015 MB of 0.015 MB uploaded
|
||
wandb: / 0.015 MB of 0.015 MB uploaded
|
||
wandb: - 0.015 MB of 0.015 MB uploaded
|
||
wandb: \ 0.015 MB of 0.015 MB uploaded
|
||
wandb: | 0.048 MB of 0.079 MB uploaded
|
||
wandb: / 0.067 MB of 0.080 MB uploaded
|
||
wandb: - 0.080 MB of 0.080 MB uploaded
|
||
wandb:
|
||
wandb: Run history:
|
||
wandb: eval/kl/n_epsilon_steps █▃▁▁
|
||
wandb: eval/kl/p_epsilon_steps ▁▆██
|
||
wandb: eval/logits/chosen ▁▅██
|
||
wandb: eval/logits/rejected ▁▅██
|
||
wandb: eval/logps/chosen █▃▁▁
|
||
wandb: eval/logps/ref_chosen ▁▁▁▁
|
||
wandb: eval/logps/ref_rejected ▁▁▁▁
|
||
wandb: eval/logps/rejected █▃▁▁
|
||
wandb: eval/loss █▁▁▃
|
||
wandb: eval/rewards/accuracies ▁▆██
|
||
wandb: eval/rewards/chosen █▁▃▅
|
||
wandb: eval/rewards/margins ▁█▆▅
|
||
wandb: eval/rewards/rejected █▁▃▅
|
||
wandb: eval/runtime █▃▆▁
|
||
wandb: eval/samples_per_second ▁▆▃█
|
||
wandb: eval/steps_per_second ▁▆▂█
|
||
wandb: train/epoch ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
|
||
wandb: train/global_step ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
|
||
wandb: train/grad_norm ▁▁▂▁▁▁▃▃▃▃▃▄▄▅▅▅▆▆▅▆▆▅▇▆▆▆▆▆█▆▆▆▅▅▅▆▅▅▄▄
|
||
wandb: train/kl/avg_steps ▁▁▃██▆▄▃▄▃▄▄▆▅▄▄▅▆▅▅▅▅▅▅▅▆▇▇▅▅▆▆▆▆▆▆▆▅▇▆
|
||
wandb: train/kl/beta ████▇▇▇▆▆▆▆▆▆▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁▁
|
||
wandb: train/kl/n_epsilon_steps ██▆▁▁▃▅▆▅▆▅▅▃▄▅▅▄▃▄▄▄▄▄▄▄▃▃▂▄▄▃▃▃▃▃▃▃▄▂▃
|
||
wandb: train/kl/p_epsilon_steps ▁▁▃██▆▄▃▄▃▄▄▆▅▄▄▅▆▅▅▅▅▅▅▅▆▇▇▅▅▆▆▆▆▆▆▆▅▇▆
|
||
wandb: train/learning_rate ▁▂▄▆▇██████▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁
|
||
wandb: train/logits/chosen ███▇▇▅▃▂▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▅▅▆
|
||
wandb: train/logits/rejected ██▇▇▇▅▃▂▁▁▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▅▅▅▆
|
||
wandb: train/logps/chosen ██████▇▆▇▆▇▆▆▅▅▅▄▄▄▄▄▄▃▃▃▂▃▂▂▂▁▂▂▂▂▂▁▁▂▁
|
||
wandb: train/logps/ref_chosen █▄▇▅▄▅▅▁▅▃▇▆▆▃▆▅▁▃▃▂▅▅▅▃▇▂▃▃▅▃▂▄▃▃▆▄▃▃▅▆
|
||
wandb: train/logps/ref_rejected █▄▃▂▄▃▅▅▂▄▅▆▄▄▃▄▂▃▄▄▃▅▂▃▅▄▂▂▄▅▃▅▂▃▁▃▄▄▁▅
|
||
wandb: train/logps/rejected ██▇▇█▇▇▇▆▆▆▆▆▅▅▅▄▄▄▄▃▄▃▃▃▂▂▂▂▂▂▂▁▂▁▁▁▂▁▁
|
||
wandb: train/loss █████▇▇▆▅▅▅▅▄▄▃▃▃▂▃▃▂▃▂▂▂▂▁▁▂▃▂▂▂▂▂▂▃▄▂▃
|
||
wandb: train/rewards/accuracies ▁▁▃▇█▇▅▅▅▅▅▅▇▅▅▅▅▆▆▆▆▅▆▆▅▆▇▇▆▆▆▆▆▆▆▆▆▅▇▆
|
||
wandb: train/rewards/chosen ██████▇▆▆▅▅▄▄▄▃▂▂▃▂▂▂▂▂▂▂▁▂▁▁▁▁▂▂▂▂▂▂▂▃▃
|
||
wandb: train/rewards/margins ▁▁▁▁▁▂▂▂▃▃▄▄▅▅▅▆▆▇▆▆▇▆▇▇▇▇██▇▆▇▇▇▆▇▆▆▅▆▆
|
||
wandb: train/rewards/rejected █████▇▇▆▆▅▅▅▄▄▃▃▃▂▂▂▂▂▂▁▂▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃
|
||
wandb:
|
||
wandb: Run summary:
|
||
wandb: eval/kl/n_epsilon_steps 0.38368
|
||
wandb: eval/kl/p_epsilon_steps 0.61589
|
||
wandb: eval/logits/chosen -0.82952
|
||
wandb: eval/logits/rejected -0.72247
|
||
wandb: eval/logps/chosen -208.40182
|
||
wandb: eval/logps/ref_chosen -87.82356
|
||
wandb: eval/logps/ref_rejected -82.81888
|
||
wandb: eval/logps/rejected -244.09407
|
||
wandb: eval/loss 0.64793
|
||
wandb: eval/rewards/accuracies 0.64149
|
||
wandb: eval/rewards/chosen -0.38306
|
||
wandb: eval/rewards/margins 0.12642
|
||
wandb: eval/rewards/rejected -0.50948
|
||
wandb: eval/runtime 22.2873
|
||
wandb: eval/samples_per_second 104.948
|
||
wandb: eval/steps_per_second 0.853
|
||
wandb: total_flos 0.0
|
||
wandb: train/epoch 1.0
|
||
wandb: train/global_step 340
|
||
wandb: train/grad_norm 4.58934
|
||
wandb: train/kl/avg_steps 0.37813
|
||
wandb: train/kl/beta 0.00321
|
||
wandb: train/kl/n_epsilon_steps 0.31094
|
||
wandb: train/kl/p_epsilon_steps 0.68906
|
||
wandb: train/learning_rate 0.0
|
||
wandb: train/logits/chosen -0.75575
|
||
wandb: train/logits/rejected -0.63987
|
||
wandb: train/logps/chosen -185.01408
|
||
wandb: train/logps/ref_chosen -72.54797
|
||
wandb: train/logps/ref_rejected -78.83278
|
||
wandb: train/logps/rejected -256.52844
|
||
wandb: train/loss 0.612
|
||
wandb: train/rewards/accuracies 0.72188
|
||
wandb: train/rewards/chosen -0.36069
|
||
wandb: train/rewards/margins 0.20638
|
||
wandb: train/rewards/rejected -0.56707
|
||
wandb: train_loss 0.62328
|
||
wandb: train_runtime 1489.7896
|
||
wandb: train_samples_per_second 29.265
|
||
wandb: train_steps_per_second 0.228
|
||
wandb:
|
||
wandb: 🚀 View run llama-3-8b-base-epsilon-dpo-hh-helpful-8xh200-20260410-233108 at: https://wandb.ai/can-not-fand-northeastern-university/huggingface/runs/4j5nnm1b
|
||
wandb: ⭐️ View project at: https://wandb.ai/can-not-fand-northeastern-university/huggingface
|
||
wandb: Synced 6 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)
|
||
wandb: Find logs at: /scratch/feng.yulu/dynamic-dpo-v4/wandb/wandb/run-20260410_234350-4j5nnm1b/logs
|
||
wandb: WARNING The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require("core")`! See https://wandb.me/wandb-core for more information.
|