1266 lines
253 KiB
Plaintext
1266 lines
253 KiB
Plaintext
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
||
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
||
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
||
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
||
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
||
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
||
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
||
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
||
2026-04-10 17:20:29 - INFO - __main__ - Model parameters ModelArguments(base_model_revision=None, model_name_or_path='/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758', model_revision='main', model_code_revision=None, torch_dtype='bfloat16', tokenizer_name_or_path=None, trust_remote_code=False, attn_implementation='flash_attention_2', use_peft=False, lora_r=16, lora_alpha=32, lora_dropout=0.05, lora_target_modules=None, lora_modules_to_save=None, load_in_8bit=False, load_in_4bit=False, bnb_4bit_quant_type='nf4', use_bnb_nested_quant=False, bnb_4bit_quant_storage='uint8')
|
||
2026-04-10 17:20:29 - INFO - __main__ - Data parameters DataArguments(chat_template=None, dataset_mixer={'Anthropic/hh-rlhf': 1.0}, text_column='text', dataset_splits=['train', 'test'], dataset_configs=['helpful-base'], dataset_dir=None, preprocessing_num_workers=12, use_persistent_hf_cache=True, hf_cache_dir='/scratch/feng.yulu/dynamic-dpo-v4/hf/datasets', truncation_side=None, auto_insert_empty_system_msg=True, preprocessing_log_samples=0, preprocessing_log_dir=None)
|
||
2026-04-10 17:20:29 - INFO - __main__ - Training/evaluation parameters MarginDPOConfig(
|
||
_n_gpu=1,
|
||
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
|
||
adafactor=False,
|
||
adam_beta1=0.9,
|
||
adam_beta2=0.999,
|
||
adam_epsilon=1e-08,
|
||
auto_find_batch_size=False,
|
||
average_tokens_across_devices=False,
|
||
batch_eval_metrics=False,
|
||
beta=0.1,
|
||
bf16=True,
|
||
bf16_full_eval=False,
|
||
data_seed=None,
|
||
dataloader_drop_last=True,
|
||
dataloader_num_workers=0,
|
||
dataloader_persistent_workers=False,
|
||
dataloader_pin_memory=True,
|
||
dataloader_prefetch_factor=None,
|
||
dataset_num_proc=12,
|
||
ddp_backend=None,
|
||
ddp_broadcast_buffers=None,
|
||
ddp_bucket_cap_mb=None,
|
||
ddp_find_unused_parameters=None,
|
||
ddp_timeout=1800,
|
||
debug=[],
|
||
deepspeed=None,
|
||
disable_dropout=True,
|
||
disable_tqdm=False,
|
||
do_eval=True,
|
||
do_predict=False,
|
||
do_train=False,
|
||
eval_accumulation_steps=None,
|
||
eval_delay=0,
|
||
eval_do_concat_batches=True,
|
||
eval_on_start=False,
|
||
eval_steps=100,
|
||
eval_strategy=IntervalStrategy.STEPS,
|
||
eval_use_gather_object=False,
|
||
f_alpha_divergence_coef=1.0,
|
||
f_divergence_type=reverse_kl,
|
||
force_use_ref_model=False,
|
||
fp16=False,
|
||
fp16_backend=auto,
|
||
fp16_full_eval=False,
|
||
fp16_opt_level=O1,
|
||
fsdp=[],
|
||
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
|
||
fsdp_min_num_params=0,
|
||
fsdp_transformer_layer_cls_to_wrap=None,
|
||
full_determinism=False,
|
||
generate_during_eval=False,
|
||
gradient_accumulation_steps=1,
|
||
gradient_checkpointing=True,
|
||
gradient_checkpointing_kwargs={'use_reentrant': False},
|
||
greater_is_better=None,
|
||
group_by_length=False,
|
||
half_precision_backend=auto,
|
||
hub_always_push=False,
|
||
hub_margin_dataset_id=W-61/llama-3-8b-base-margin-dpo-hh-helpful-margin-log,
|
||
hub_model_id=W-61/llama-3-8b-base-margin-dpo-hh-helpful,
|
||
hub_model_revision=main,
|
||
hub_private_repo=None,
|
||
hub_strategy=HubStrategy.EVERY_SAVE,
|
||
hub_token=<HUB_TOKEN>,
|
||
ignore_data_skip=False,
|
||
include_for_metrics=[],
|
||
include_inputs_for_metrics=False,
|
||
include_num_input_tokens_seen=False,
|
||
include_tokens_per_second=False,
|
||
is_encoder_decoder=None,
|
||
jit_mode_eval=False,
|
||
label_names=None,
|
||
label_pad_token_id=-100,
|
||
label_smoothing=0.0,
|
||
label_smoothing_factor=0.0,
|
||
learning_rate=5e-07,
|
||
length_column_name=length,
|
||
load_best_model_at_end=False,
|
||
local_rank=0,
|
||
log_level=info,
|
||
log_level_replica=warning,
|
||
log_on_each_node=True,
|
||
logging_dir=outputs/llama-3-8b-base-margin-dpo-hh-helpful/runs/Apr10_17-20-28_d4054,
|
||
logging_first_step=True,
|
||
logging_nan_inf_filter=True,
|
||
logging_steps=5,
|
||
logging_strategy=IntervalStrategy.STEPS,
|
||
loss_type=sigmoid,
|
||
lr_scheduler_kwargs={},
|
||
lr_scheduler_type=SchedulerType.COSINE,
|
||
margin_dataset_private=None,
|
||
margin_dataset_split=train,
|
||
margin_log_path=/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/margin_logs,
|
||
margin_log_steps=1,
|
||
margin_save_full=True,
|
||
max_grad_norm=1.0,
|
||
max_length=512,
|
||
max_prompt_length=256,
|
||
max_steps=-1,
|
||
max_target_length=None,
|
||
metric_for_best_model=None,
|
||
model_adapter_name=None,
|
||
model_init_kwargs=None,
|
||
mp_parameters=,
|
||
neftune_noise_alpha=None,
|
||
no_cuda=False,
|
||
non_finite_logits_handling=error,
|
||
num_train_epochs=1,
|
||
optim=OptimizerNames.ADAMW_TORCH,
|
||
optim_args=None,
|
||
optim_target_modules=None,
|
||
output_dir=/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009,
|
||
overwrite_output_dir=False,
|
||
padding_value=None,
|
||
past_index=-1,
|
||
per_device_eval_batch_size=16,
|
||
per_device_train_batch_size=16,
|
||
post_tokenization_log_dir=None,
|
||
post_tokenization_log_samples=0,
|
||
precompute_ref_batch_size=None,
|
||
precompute_ref_eval_batch_size=None,
|
||
precompute_ref_log_probs=False,
|
||
prediction_loss_only=False,
|
||
push_margin_dataset=False,
|
||
push_to_hub=False,
|
||
push_to_hub_model_id=None,
|
||
push_to_hub_organization=None,
|
||
push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
|
||
ray_scope=last,
|
||
ref_adapter_name=None,
|
||
ref_model_init_kwargs=None,
|
||
ref_model_mixup_alpha=0.9,
|
||
ref_model_sync_steps=64,
|
||
reference_free=False,
|
||
remove_unused_columns=False,
|
||
report_to=['wandb'],
|
||
require_explicit_ref_model=True,
|
||
restore_callback_states_from_checkpoint=False,
|
||
resume_from_checkpoint=None,
|
||
reuse_tokenized_dataset=True,
|
||
rpo_alpha=None,
|
||
run_name=llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009,
|
||
save_on_each_node=False,
|
||
save_only_model=False,
|
||
save_safetensors=True,
|
||
save_steps=200,
|
||
save_strategy=SaveStrategy.STEPS,
|
||
save_total_limit=2,
|
||
seed=42,
|
||
sft_weight=0.0,
|
||
skip_memory_metrics=True,
|
||
sync_ref_model=False,
|
||
tf32=None,
|
||
tokenization_batch_size=128,
|
||
tokenization_mode=online,
|
||
tokenized_dataset_cache_dir=/scratch/feng.yulu/dynamic-dpo-v4/tokenized_preferences,
|
||
torch_compile=False,
|
||
torch_compile_backend=None,
|
||
torch_compile_mode=None,
|
||
torch_empty_cache_steps=None,
|
||
torchdynamo=None,
|
||
tp_size=0,
|
||
tpu_metrics_debug=False,
|
||
tpu_num_cores=None,
|
||
trainer_type=margin_dpo,
|
||
truncation_mode=keep_end,
|
||
use_cpu=False,
|
||
use_ipex=False,
|
||
use_legacy_prediction_loop=False,
|
||
use_liger_kernel=False,
|
||
use_mps_device=False,
|
||
warmup_ratio=0.1,
|
||
warmup_steps=0,
|
||
weight_decay=0.0,
|
||
)
|
||
2026-04-10 17:20:29 - INFO - __main__ - Margin-DPO parameters: beta=0.1, f_divergence_type=reverse_kl, margin_log_steps=1
|
||
2026-04-10 17:20:29 - INFO - __main__ - Using persistent HF datasets cache at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets
|
||
2026-04-10 17:20:32 - WARNING - __main__ - Dropped 237 non-canonical HH preference examples from split `train` before normalization (126 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 111 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
|
||
|
||
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (train): 3%|▎ | 1097/43598 [00:00<00:03, 10895.92 examples/s]
|
||
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (train): 5%|▌ | 2353/43598 [00:00<00:03, 11866.77 examples/s]
|
||
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (train): 3%|▎ | 1169/43598 [00:00<00:03, 11635.94 examples/s]
|
||
Normalizing raw HH preferences (train): 8%|▊ | 3681/43598 [00:00<00:03, 12163.46 examples/s]
|
||
Normalizing raw HH preferences (train): 3%|▎ | 1142/43598 [00:00<00:03, 11368.22 examples/s]
|
||
Normalizing raw HH preferences (train): 3%|▎ | 1147/43598 [00:00<00:03, 11418.69 examples/s]
|
||
Normalizing raw HH preferences (train): 3%|▎ | 1183/43598 [00:00<00:03, 11769.88 examples/s]
|
||
Normalizing raw HH preferences (train): 3%|▎ | 1173/43598 [00:00<00:03, 11669.94 examples/s]
|
||
Normalizing raw HH preferences (train): 6%|▌ | 2453/43598 [00:00<00:03, 12336.30 examples/s]
|
||
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (train): 11%|█▏ | 4914/43598 [00:00<00:03, 12223.15 examples/s]
|
||
Normalizing raw HH preferences (train): 6%|▌ | 2440/43598 [00:00<00:03, 12305.80 examples/s]
|
||
Normalizing raw HH preferences (train): 6%|▌ | 2448/43598 [00:00<00:03, 12351.05 examples/s]
|
||
Normalizing raw HH preferences (train): 6%|▌ | 2491/43598 [00:00<00:03, 12526.50 examples/s]
|
||
Normalizing raw HH preferences (train): 6%|▌ | 2476/43598 [00:00<00:03, 12463.84 examples/s]
|
||
Normalizing raw HH preferences (train): 9%|▊ | 3726/43598 [00:00<00:03, 12512.55 examples/s]
|
||
Normalizing raw HH preferences (train): 3%|▎ | 1154/43598 [00:00<00:03, 11484.21 examples/s]
|
||
Normalizing raw HH preferences (train): 9%|▊ | 3720/43598 [00:00<00:03, 12528.24 examples/s]
|
||
Normalizing raw HH preferences (train): 9%|▊ | 3742/43598 [00:00<00:03, 12617.11 examples/s]
|
||
Normalizing raw HH preferences (train): 9%|▊ | 3787/43598 [00:00<00:03, 12720.52 examples/s]
|
||
Normalizing raw HH preferences (train): 6%|▌ | 2445/43598 [00:00<00:03, 12316.85 examples/s]
|
||
Normalizing raw HH preferences (train): 15%|█▌ | 6667/43598 [00:00<00:03, 10902.26 examples/s]
|
||
Normalizing raw HH preferences (train): 11%|█▏ | 4978/43598 [00:00<00:03, 12542.73 examples/s]
|
||
Normalizing raw HH preferences (train): 10%|▉ | 4291/43598 [00:00<00:03, 11379.36 examples/s]
|
||
Normalizing raw HH preferences (train): 13%|█▎ | 5512/43598 [00:00<00:03, 12211.02 examples/s]
|
||
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (train): 13%|█▎ | 5567/43598 [00:00<00:03, 12388.75 examples/s]
|
||
Normalizing raw HH preferences (train): 9%|▊ | 3728/43598 [00:00<00:03, 12545.74 examples/s]
|
||
Normalizing raw HH preferences (train): 18%|█▊ | 7911/43598 [00:00<00:03, 11334.14 examples/s]
|
||
Normalizing raw HH preferences (train): 13%|█▎ | 5712/43598 [00:00<00:03, 12618.63 examples/s]
|
||
Normalizing raw HH preferences (train): 13%|█▎ | 5577/43598 [00:00<00:03, 11866.82 examples/s]
|
||
Normalizing raw HH preferences (train): 16%|█▌ | 6770/43598 [00:00<00:02, 12326.95 examples/s]
|
||
Normalizing raw HH preferences (train): 2%|▏ | 1000/43598 [00:00<00:04, 9715.76 examples/s]
|
||
Normalizing raw HH preferences (train): 16%|█▌ | 6841/43598 [00:00<00:02, 12482.80 examples/s]
|
||
Normalizing raw HH preferences (train): 16%|█▌ | 6841/43598 [00:00<00:02, 12500.01 examples/s]
|
||
Normalizing raw HH preferences (train): 11%|█▏ | 4993/43598 [00:00<00:03, 12583.12 examples/s]
|
||
Normalizing raw HH preferences (train): 16%|█▌ | 6991/43598 [00:00<00:02, 12668.33 examples/s]
|
||
Normalizing raw HH preferences (train): 16%|█▌ | 6858/43598 [00:00<00:03, 12168.15 examples/s]
|
||
Normalizing raw HH preferences (train): 22%|██▏ | 9729/43598 [00:00<00:02, 11624.50 examples/s]
|
||
Normalizing raw HH preferences (train): 5%|▌ | 2250/43598 [00:00<00:03, 11329.05 examples/s]
|
||
Normalizing raw HH preferences (train): 20%|█▉ | 8687/43598 [00:00<00:02, 12367.17 examples/s]
|
||
Normalizing raw HH preferences (train): 20%|██ | 8731/43598 [00:00<00:02, 12535.96 examples/s]
|
||
Normalizing raw HH preferences (train): 20%|██ | 8720/43598 [00:00<00:02, 12494.56 examples/s]
|
||
Normalizing raw HH preferences (train): 16%|█▌ | 6851/43598 [00:00<00:02, 12488.03 examples/s]
|
||
Normalizing raw HH preferences (train): 25%|██▌ | 10985/43598 [00:00<00:02, 11869.28 examples/s]
|
||
Normalizing raw HH preferences (train): 8%|▊ | 3499/43598 [00:00<00:03, 11850.06 examples/s]
|
||
Normalizing raw HH preferences (train): 20%|██ | 8890/43598 [00:00<00:02, 12663.29 examples/s]
|
||
Normalizing raw HH preferences (train): 20%|██ | 8749/43598 [00:00<00:02, 12342.87 examples/s]
|
||
Normalizing raw HH preferences (train): 23%|██▎ | 9965/43598 [00:00<00:02, 12478.22 examples/s]
|
||
Normalizing raw HH preferences (train): 23%|██▎ | 10000/43598 [00:00<00:02, 12379.66 examples/s]
|
||
Normalizing raw HH preferences (train): 23%|██▎ | 10000/43598 [00:00<00:02, 12341.08 examples/s]
|
||
Normalizing raw HH preferences (train): 11%|█ | 4737/43598 [00:00<00:03, 12052.65 examples/s]
|
||
Normalizing raw HH preferences (train): 20%|██ | 8722/43598 [00:00<00:02, 12479.36 examples/s]
|
||
Normalizing raw HH preferences (train): 23%|██▎ | 10000/43598 [00:00<00:02, 12250.68 examples/s]
|
||
Normalizing raw HH preferences (train): 25%|██▍ | 10796/43598 [00:00<00:02, 12674.85 examples/s]
|
||
Normalizing raw HH preferences (train): 26%|██▌ | 11271/43598 [00:00<00:02, 12470.44 examples/s]
|
||
Normalizing raw HH preferences (train): 26%|██▌ | 11299/43598 [00:00<00:02, 12518.64 examples/s]
|
||
Normalizing raw HH preferences (train): 27%|██▋ | 11844/43598 [00:00<00:02, 12490.55 examples/s]
|
||
Normalizing raw HH preferences (train): 14%|█▎ | 5969/43598 [00:00<00:03, 12144.50 examples/s]
|
||
Normalizing raw HH preferences (train): 26%|██▌ | 11313/43598 [00:00<00:02, 12496.58 examples/s]
|
||
Normalizing raw HH preferences (train): 23%|██▎ | 10000/43598 [00:00<00:02, 12322.44 examples/s]
|
||
Normalizing raw HH preferences (train): 29%|██▉ | 12564/43598 [00:01<00:02, 12599.96 examples/s]
|
||
Normalizing raw HH preferences (train): 29%|██▉ | 12586/43598 [00:01<00:02, 12613.70 examples/s]
|
||
Normalizing raw HH preferences (train): 29%|██▉ | 12705/43598 [00:01<00:02, 12674.47 examples/s]
|
||
Normalizing raw HH preferences (train): 26%|██▌ | 11262/43598 [00:00<00:02, 12402.03 examples/s]
|
||
Normalizing raw HH preferences (train): 29%|██▉ | 12672/43598 [00:01<00:02, 12573.86 examples/s]
|
||
Normalizing raw HH preferences (train): 18%|█▊ | 7781/43598 [00:00<00:02, 12113.92 examples/s]
|
||
Normalizing raw HH preferences (train): 29%|██▉ | 12719/43598 [00:01<00:04, 7313.20 examples/s]
|
||
Normalizing raw HH preferences (train): 29%|██▉ | 12537/43598 [00:01<00:02, 12497.55 examples/s]
|
||
Normalizing raw HH preferences (train): 21%|██ | 9000/43598 [00:00<00:02, 11916.94 examples/s]
|
||
Normalizing raw HH preferences (train): 30%|███ | 13259/43598 [00:01<00:03, 8760.82 examples/s]
|
||
Normalizing raw HH preferences (train): 32%|███▏ | 13922/43598 [00:01<00:03, 8143.74 examples/s]
|
||
Normalizing raw HH preferences (train): 32%|███▏ | 13890/43598 [00:01<00:03, 8475.56 examples/s]
|
||
Normalizing raw HH preferences (train): 32%|███▏ | 13962/43598 [00:01<00:03, 8493.78 examples/s]
|
||
Normalizing raw HH preferences (train): 24%|██▎ | 10250/43598 [00:00<00:02, 12085.19 examples/s]
|
||
Normalizing raw HH preferences (train): 33%|███▎ | 14515/43598 [00:01<00:03, 9528.00 examples/s]
|
||
Normalizing raw HH preferences (train): 35%|███▍ | 15049/43598 [00:01<00:03, 8771.22 examples/s]
|
||
Normalizing raw HH preferences (train): 33%|███▎ | 14551/43598 [00:01<00:03, 9327.39 examples/s]
|
||
Normalizing raw HH preferences (train): 32%|███▏ | 13966/43598 [00:01<00:03, 8508.45 examples/s]
|
||
Normalizing raw HH preferences (train): 35%|███▍ | 15043/43598 [00:01<00:03, 9131.74 examples/s]
|
||
Normalizing raw HH preferences (train): 35%|███▍ | 15111/43598 [00:01<00:03, 9132.30 examples/s]
|
||
Normalizing raw HH preferences (train): 26%|██▋ | 11490/43598 [00:00<00:02, 12174.57 examples/s]
|
||
Normalizing raw HH preferences (train): 36%|███▌ | 15797/43598 [00:01<00:02, 10261.98 examples/s]
|
||
Normalizing raw HH preferences (train): 37%|███▋ | 16291/43598 [00:01<00:02, 9583.12 examples/s]
|
||
Normalizing raw HH preferences (train): 32%|███▏ | 13951/43598 [00:01<00:03, 8806.92 examples/s]
|
||
Normalizing raw HH preferences (train): 36%|███▋ | 15830/43598 [00:01<00:02, 9980.26 examples/s]
|
||
Normalizing raw HH preferences (train): 35%|███▍ | 15158/43598 [00:01<00:03, 9245.09 examples/s]
|
||
Normalizing raw HH preferences (train): 37%|███▋ | 16334/43598 [00:01<00:02, 10017.84 examples/s]
|
||
Normalizing raw HH preferences (train): 38%|███▊ | 16406/43598 [00:01<00:02, 10021.31 examples/s]
|
||
Normalizing raw HH preferences (train): 39%|███▉ | 17000/43598 [00:01<00:02, 10630.72 examples/s]
|
||
Normalizing raw HH preferences (train): 40%|████ | 17506/43598 [00:01<00:02, 10201.35 examples/s]
|
||
Normalizing raw HH preferences (train): 35%|███▍ | 15120/43598 [00:01<00:03, 9435.31 examples/s]
|
||
Normalizing raw HH preferences (train): 39%|███▉ | 17025/43598 [00:01<00:02, 10395.55 examples/s]
|
||
Normalizing raw HH preferences (train): 38%|███▊ | 16459/43598 [00:01<00:02, 10130.69 examples/s]
|
||
Normalizing raw HH preferences (train): 41%|████ | 17688/43598 [00:01<00:02, 10749.93 examples/s]
|
||
Normalizing raw HH preferences (train): 41%|████ | 17695/43598 [00:01<00:02, 10729.38 examples/s]
|
||
Normalizing raw HH preferences (train): 42%|████▏ | 18250/43598 [00:01<00:02, 11107.51 examples/s]
|
||
Normalizing raw HH preferences (train): 43%|████▎ | 18726/43598 [00:01<00:02, 10711.89 examples/s]
|
||
Normalizing raw HH preferences (train): 38%|███▊ | 16406/43598 [00:01<00:02, 10248.95 examples/s]
|
||
Normalizing raw HH preferences (train): 42%|████▏ | 18300/43598 [00:01<00:02, 10942.67 examples/s]
|
||
Normalizing raw HH preferences (train): 41%|████ | 17728/43598 [00:01<00:02, 10772.67 examples/s]
|
||
Normalizing raw HH preferences (train): 43%|████▎ | 18942/43598 [00:01<00:02, 11216.99 examples/s]
|
||
Normalizing raw HH preferences (train): 43%|████▎ | 18962/43598 [00:01<00:02, 11236.77 examples/s]
|
||
Normalizing raw HH preferences (train): 45%|████▍ | 19512/43598 [00:01<00:02, 11508.78 examples/s]
|
||
Normalizing raw HH preferences (train): 46%|████▌ | 19956/43598 [00:01<00:02, 11135.90 examples/s]
|
||
Normalizing raw HH preferences (train): 41%|████ | 17690/43598 [00:01<00:02, 10884.92 examples/s]
|
||
Normalizing raw HH preferences (train): 29%|██▉ | 12719/43598 [00:01<00:04, 6956.98 examples/s]
|
||
Normalizing raw HH preferences (train): 45%|████▍ | 19586/43598 [00:01<00:02, 11418.44 examples/s]
|
||
Normalizing raw HH preferences (train): 44%|████▎ | 18985/43598 [00:01<00:02, 11247.43 examples/s]
|
||
Normalizing raw HH preferences (train): 48%|████▊ | 20789/43598 [00:01<00:01, 11853.93 examples/s]
|
||
Normalizing raw HH preferences (train): 48%|████▊ | 20819/43598 [00:01<00:01, 11679.12 examples/s]
|
||
Normalizing raw HH preferences (train): 48%|████▊ | 20850/43598 [00:01<00:01, 11721.30 examples/s]
|
||
Normalizing raw HH preferences (train): 43%|████▎ | 18911/43598 [00:01<00:02, 11233.34 examples/s]
|
||
Normalizing raw HH preferences (train): 32%|███▏ | 13922/43598 [00:01<00:03, 7939.70 examples/s]
|
||
Normalizing raw HH preferences (train): 48%|████▊ | 20881/43598 [00:01<00:01, 11817.95 examples/s]
|
||
Normalizing raw HH preferences (train): 50%|████▉ | 21752/43598 [00:02<00:01, 11435.99 examples/s]
|
||
Normalizing raw HH preferences (train): 48%|████▊ | 20870/43598 [00:01<00:01, 11721.07 examples/s]
|
||
Normalizing raw HH preferences (train): 35%|███▍ | 15057/43598 [00:01<00:03, 8678.55 examples/s]
|
||
Normalizing raw HH preferences (train): 52%|█████▏ | 22690/43598 [00:01<00:01, 12039.71 examples/s]
|
||
Normalizing raw HH preferences (train): 53%|█████▎ | 22998/43598 [00:02<00:01, 11697.92 examples/s]
|
||
Normalizing raw HH preferences (train): 52%|█████▏ | 22724/43598 [00:01<00:01, 11978.66 examples/s]
|
||
Normalizing raw HH preferences (train): 52%|█████▏ | 22692/43598 [00:01<00:01, 11889.59 examples/s]
|
||
Normalizing raw HH preferences (train): 48%|████▊ | 20773/43598 [00:01<00:01, 11658.24 examples/s]
|
||
Normalizing raw HH preferences (train): 52%|█████▏ | 22779/43598 [00:01<00:01, 12114.22 examples/s]
|
||
Normalizing raw HH preferences (train): 37%|███▋ | 16305/43598 [00:01<00:02, 9576.12 examples/s]
|
||
Normalizing raw HH preferences (train): 52%|█████▏ | 22764/43598 [00:02<00:01, 12025.26 examples/s]
|
||
Normalizing raw HH preferences (train): 55%|█████▍ | 23967/43598 [00:02<00:01, 12226.74 examples/s]
|
||
Normalizing raw HH preferences (train): 55%|█████▌ | 23981/43598 [00:02<00:01, 12129.95 examples/s]
|
||
Normalizing raw HH preferences (train): 55%|█████▌ | 24000/43598 [00:02<00:01, 11979.32 examples/s]
|
||
Normalizing raw HH preferences (train): 50%|█████ | 22000/43598 [00:01<00:01, 11677.62 examples/s]
|
||
Normalizing raw HH preferences (train): 57%|█████▋ | 24813/43598 [00:02<00:01, 11837.60 examples/s]
|
||
Normalizing raw HH preferences (train): 40%|████ | 17519/43598 [00:01<00:02, 10221.14 examples/s]
|
||
Normalizing raw HH preferences (train): 57%|█████▋ | 24699/43598 [00:02<00:01, 12296.37 examples/s]
|
||
Normalizing raw HH preferences (train): 55%|█████▌ | 24000/43598 [00:02<00:01, 12015.52 examples/s]
|
||
Normalizing raw HH preferences (train): 58%|█████▊ | 25279/43598 [00:02<00:01, 12185.74 examples/s]
|
||
Normalizing raw HH preferences (train): 53%|█████▎ | 23276/43598 [00:02<00:01, 11963.12 examples/s]
|
||
Normalizing raw HH preferences (train): 59%|█████▉ | 25809/43598 [00:02<00:01, 12242.66 examples/s]
|
||
Normalizing raw HH preferences (train): 59%|█████▉ | 25839/43598 [00:02<00:01, 12214.59 examples/s]
|
||
Normalizing raw HH preferences (train): 43%|████▎ | 18745/43598 [00:01<00:02, 10759.48 examples/s]
|
||
Normalizing raw HH preferences (train): 60%|█████▉ | 25981/43598 [00:02<00:01, 12422.63 examples/s]
|
||
Normalizing raw HH preferences (train): 58%|█████▊ | 25284/43598 [00:02<00:01, 12226.22 examples/s]
|
||
Normalizing raw HH preferences (train): 61%|██████ | 26667/43598 [00:02<00:01, 11877.48 examples/s]
|
||
Normalizing raw HH preferences (train): 61%|██████ | 26542/43598 [00:02<00:01, 12303.38 examples/s]
|
||
Normalizing raw HH preferences (train): 56%|█████▋ | 24553/43598 [00:02<00:01, 12183.33 examples/s]
|
||
Normalizing raw HH preferences (train): 46%|████▌ | 19975/43598 [00:01<00:02, 11179.08 examples/s]
|
||
Normalizing raw HH preferences (train): 61%|██████ | 26552/43598 [00:02<00:01, 12344.08 examples/s]
|
||
Normalizing raw HH preferences (train): 63%|██████▎ | 27684/43598 [00:02<00:01, 12220.08 examples/s]
|
||
Normalizing raw HH preferences (train): 64%|██████▍ | 27887/43598 [00:02<00:01, 11954.46 examples/s]
|
||
Normalizing raw HH preferences (train): 64%|██████▎ | 27687/43598 [00:02<00:01, 12215.14 examples/s]
|
||
Normalizing raw HH preferences (train): 64%|██████▍ | 27815/43598 [00:02<00:01, 12417.48 examples/s]
|
||
Normalizing raw HH preferences (train): 64%|██████▍ | 27837/43598 [00:02<00:01, 12400.66 examples/s]
|
||
Normalizing raw HH preferences (train): 59%|█████▉ | 25824/43598 [00:02<00:01, 12330.32 examples/s]
|
||
Normalizing raw HH preferences (train): 64%|██████▍ | 27823/43598 [00:02<00:01, 12443.01 examples/s]
|
||
Normalizing raw HH preferences (train): 66%|██████▋ | 28949/43598 [00:02<00:01, 12321.94 examples/s]
|
||
Normalizing raw HH preferences (train): 66%|██████▋ | 28967/43598 [00:02<00:01, 12353.17 examples/s]
|
||
Normalizing raw HH preferences (train): 50%|████▉ | 21784/43598 [00:02<00:01, 11503.34 examples/s]
|
||
Normalizing raw HH preferences (train): 68%|██████▊ | 29729/43598 [00:02<00:01, 12063.72 examples/s]
|
||
Normalizing raw HH preferences (train): 68%|██████▊ | 29748/43598 [00:02<00:01, 12509.44 examples/s]
|
||
Normalizing raw HH preferences (train): 68%|██████▊ | 29711/43598 [00:02<00:01, 12365.46 examples/s]
|
||
Normalizing raw HH preferences (train): 64%|██████▎ | 27688/43598 [00:02<00:01, 12307.89 examples/s]
|
||
Normalizing raw HH preferences (train): 53%|█████▎ | 23000/43598 [00:02<00:01, 11508.78 examples/s]
|
||
Normalizing raw HH preferences (train): 68%|██████▊ | 29735/43598 [00:02<00:01, 12554.20 examples/s]
|
||
Normalizing raw HH preferences (train): 71%|███████ | 30832/43598 [00:02<00:01, 12397.64 examples/s]
|
||
Normalizing raw HH preferences (train): 71%|███████ | 30968/43598 [00:02<00:01, 12142.75 examples/s]
|
||
Normalizing raw HH preferences (train): 71%|███████ | 30854/43598 [00:02<00:01, 12423.76 examples/s]
|
||
Normalizing raw HH preferences (train): 71%|███████ | 30995/43598 [00:02<00:01, 12487.00 examples/s]
|
||
Normalizing raw HH preferences (train): 66%|██████▋ | 28967/43598 [00:02<00:01, 12432.67 examples/s]
|
||
Normalizing raw HH preferences (train): 56%|█████▌ | 24242/43598 [00:02<00:01, 11750.60 examples/s]
|
||
Normalizing raw HH preferences (train): 73%|███████▎ | 31700/43598 [00:02<00:00, 12546.70 examples/s]
|
||
Normalizing raw HH preferences (train): 71%|███████ | 31000/43598 [00:02<00:01, 12399.80 examples/s]
|
||
Normalizing raw HH preferences (train): 75%|███████▌ | 32699/43598 [00:02<00:00, 12397.90 examples/s]
|
||
Normalizing raw HH preferences (train): 75%|███████▌ | 32801/43598 [00:02<00:00, 12165.18 examples/s]
|
||
Normalizing raw HH preferences (train): 75%|███████▌ | 32723/43598 [00:02<00:00, 12432.29 examples/s]
|
||
Normalizing raw HH preferences (train): 75%|███████▌ | 32882/43598 [00:02<00:00, 12515.04 examples/s]
|
||
Normalizing raw HH preferences (train): 71%|███████ | 30845/43598 [00:02<00:01, 12461.63 examples/s]
|
||
Normalizing raw HH preferences (train): 58%|█████▊ | 25481/43598 [00:02<00:01, 11925.85 examples/s]
|
||
Normalizing raw HH preferences (train): 76%|███████▌ | 32992/43598 [00:02<00:00, 12632.58 examples/s]
|
||
Normalizing raw HH preferences (train): 74%|███████▍ | 32302/43598 [00:02<00:00, 12564.89 examples/s]
|
||
Normalizing raw HH preferences (train): 78%|███████▊ | 33948/43598 [00:02<00:00, 12416.93 examples/s]
|
||
Normalizing raw HH preferences (train): 78%|███████▊ | 33978/43598 [00:02<00:00, 12459.09 examples/s]
|
||
Normalizing raw HH preferences (train): 61%|██████▏ | 26708/43598 [00:02<00:01, 12018.70 examples/s]
|
||
Normalizing raw HH preferences (train): 77%|███████▋ | 33576/43598 [00:02<00:00, 12610.94 examples/s]
|
||
Normalizing raw HH preferences (train): 80%|███████▉ | 34680/43598 [00:03<00:00, 12106.67 examples/s]
|
||
Normalizing raw HH preferences (train): 80%|███████▉ | 34754/43598 [00:02<00:00, 12500.75 examples/s]
|
||
Normalizing raw HH preferences (train): 75%|███████▌ | 32718/43598 [00:02<00:00, 12467.27 examples/s]
|
||
Normalizing raw HH preferences (train): 80%|████████ | 34879/43598 [00:02<00:00, 12612.61 examples/s]
|
||
Normalizing raw HH preferences (train): 80%|███████▉ | 34869/43598 [00:02<00:00, 12700.90 examples/s]
|
||
Normalizing raw HH preferences (train): 82%|████████▏ | 35797/43598 [00:03<00:00, 12381.79 examples/s]
|
||
Normalizing raw HH preferences (train): 82%|████████▏ | 35830/43598 [00:03<00:00, 12419.37 examples/s]
|
||
Normalizing raw HH preferences (train): 78%|███████▊ | 33974/43598 [00:02<00:00, 12488.73 examples/s]
|
||
Normalizing raw HH preferences (train): 65%|██████▌ | 28486/43598 [00:02<00:01, 11950.70 examples/s]
|
||
Normalizing raw HH preferences (train): 83%|████████▎ | 36398/43598 [00:03<00:00, 11896.78 examples/s]
|
||
Normalizing raw HH preferences (train): 84%|████████▍ | 36743/43598 [00:03<00:00, 12551.23 examples/s]
|
||
Normalizing raw HH preferences (train): 84%|████████▍ | 36702/43598 [00:03<00:00, 12442.89 examples/s]
|
||
Normalizing raw HH preferences (train): 68%|██████▊ | 29748/43598 [00:02<00:01, 12124.97 examples/s]
|
||
Normalizing raw HH preferences (train): 84%|████████▍ | 36704/43598 [00:03<00:00, 12427.31 examples/s]
|
||
Normalizing raw HH preferences (train): 86%|████████▋ | 37693/43598 [00:03<00:00, 12356.83 examples/s]
|
||
Normalizing raw HH preferences (train): 86%|████████▋ | 37696/43598 [00:03<00:00, 12409.28 examples/s]
|
||
Normalizing raw HH preferences (train): 86%|████████▋ | 37681/43598 [00:03<00:00, 12037.26 examples/s]
|
||
Normalizing raw HH preferences (train): 82%|████████▏ | 35837/43598 [00:03<00:00, 12460.08 examples/s]
|
||
Normalizing raw HH preferences (train): 87%|████████▋ | 37981/43598 [00:03<00:00, 12520.65 examples/s]
|
||
Normalizing raw HH preferences (train): 71%|███████ | 30984/43598 [00:02<00:01, 12186.46 examples/s]
|
||
Normalizing raw HH preferences (train): 89%|████████▉ | 38709/43598 [00:03<00:00, 12556.27 examples/s]
|
||
Normalizing raw HH preferences (train): 87%|████████▋ | 37982/43598 [00:03<00:00, 12517.31 examples/s]
|
||
Normalizing raw HH preferences (train): 89%|████████▉ | 38951/43598 [00:03<00:00, 12404.55 examples/s]
|
||
Normalizing raw HH preferences (train): 89%|████████▉ | 38954/43598 [00:03<00:00, 12446.15 examples/s]
|
||
Normalizing raw HH preferences (train): 89%|████████▉ | 38911/43598 [00:03<00:00, 12098.25 examples/s]
|
||
Normalizing raw HH preferences (train): 86%|████████▋ | 37696/43598 [00:03<00:00, 12436.11 examples/s]
|
||
Normalizing raw HH preferences (train): 91%|█████████▏| 39840/43598 [00:03<00:00, 12473.86 examples/s]
|
||
Normalizing raw HH preferences (train): 92%|█████████▏| 39981/43598 [00:03<00:00, 12589.65 examples/s]
|
||
Normalizing raw HH preferences (train): 75%|███████▌ | 32801/43598 [00:02<00:00, 12157.72 examples/s]
|
||
Normalizing raw HH preferences (train): 91%|█████████▏| 39854/43598 [00:03<00:00, 12496.35 examples/s]
|
||
Normalizing raw HH preferences (train): 94%|█████████▎| 40774/43598 [00:03<00:00, 12317.51 examples/s]
|
||
Normalizing raw HH preferences (train): 94%|█████████▎| 40769/43598 [00:03<00:00, 12327.86 examples/s]
|
||
Normalizing raw HH preferences (train): 93%|█████████▎| 40720/43598 [00:03<00:00, 12082.09 examples/s]
|
||
Normalizing raw HH preferences (train): 91%|█████████ | 39470/43598 [00:03<00:00, 12238.66 examples/s]
|
||
Normalizing raw HH preferences (train): 96%|█████████▌| 41724/43598 [00:03<00:00, 12499.30 examples/s]
|
||
Normalizing raw HH preferences (train): 96%|█████████▌| 41866/43598 [00:03<00:00, 12578.41 examples/s]
|
||
Normalizing raw HH preferences (train): 96%|█████████▌| 41956/43598 [00:03<00:00, 12149.87 examples/s]
|
||
Normalizing raw HH preferences (train): 80%|███████▉ | 34681/43598 [00:03<00:00, 12109.22 examples/s]
|
||
Normalizing raw HH preferences (train): 96%|█████████▌| 41742/43598 [00:03<00:00, 12524.05 examples/s]
|
||
Normalizing raw HH preferences (train): 98%|█████████▊| 42689/43598 [00:03<00:00, 12330.82 examples/s]
|
||
Normalizing raw HH preferences (train): 98%|█████████▊| 42697/43598 [00:03<00:00, 12370.22 examples/s]
|
||
Normalizing raw HH preferences (train): 93%|█████████▎| 40733/43598 [00:03<00:00, 12326.93 examples/s]
|
||
Normalizing raw HH preferences (train): 99%|█████████▊| 42998/43598 [00:03<00:00, 12553.36 examples/s]
|
||
Normalizing raw HH preferences (train): 99%|█████████▊| 43000/43598 [00:03<00:00, 12353.98 examples/s]
|
||
Normalizing raw HH preferences (train): 83%|████████▎ | 36387/43598 [00:03<00:00, 11865.33 examples/s]
|
||
Normalizing raw HH preferences (train): 96%|█████████▋| 42000/43598 [00:03<00:00, 12188.39 examples/s]
|
||
Normalizing raw HH preferences (train): 99%|█████████▉| 43276/43598 [00:03<00:00, 12333.65 examples/s]
|
||
Normalizing raw HH preferences (train): 86%|████████▋ | 37679/43598 [00:03<00:00, 12011.93 examples/s]
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 9197.24 examples/s]
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 8464.29 examples/s]
|
||
Normalizing raw HH preferences (train): 89%|████████▉ | 38894/43598 [00:03<00:00, 12044.56 examples/s]
|
||
Normalizing raw HH preferences (train): 93%|█████████▎| 40684/43598 [00:03<00:00, 12002.08 examples/s]
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10643.23 examples/s]
|
||
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10649.37 examples/s]
|
||
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10703.45 examples/s]
|
||
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10614.25 examples/s]
|
||
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10743.31 examples/s]
|
||
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 11049.41 examples/s]
|
||
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10067.30 examples/s]
|
||
|
||
Normalizing raw HH preferences (train): 96%|█████████▌| 41913/43598 [00:03<00:00, 12073.11 examples/s]
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 10687.60 examples/s]
|
||
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:04<00:00, 10872.14 examples/s]
|
||
|
||
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]2026-04-10 17:20:37 - WARNING - __main__ - Dropped 15 non-canonical HH preference examples from split `test` before normalization (9 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 6 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
|
||
|
||
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (test): 50%|█████ | 1174/2339 [00:00<00:00, 11692.34 examples/s]
|
||
Normalizing raw HH preferences (test): 51%|█████ | 1193/2339 [00:00<00:00, 11881.37 examples/s]
|
||
Normalizing raw HH preferences (test): 51%|█████ | 1194/2339 [00:00<00:00, 11895.80 examples/s]
|
||
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (test): 48%|████▊ | 1129/2339 [00:00<00:00, 11241.39 examples/s]
|
||
Normalizing raw HH preferences (test): 50%|█████ | 1176/2339 [00:00<00:00, 11714.54 examples/s]
|
||
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10833.69 examples/s]
|
||
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10813.29 examples/s]
|
||
|
||
Normalizing raw HH preferences (test): 50%|████▉ | 1169/2339 [00:00<00:00, 11646.00 examples/s]
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10954.81 examples/s]
|
||
|
||
Normalizing raw HH preferences (test): 100%|█████████▉| 2334/2339 [00:00<00:00, 11715.48 examples/s]
|
||
Normalizing raw HH preferences (test): 47%|████▋ | 1094/2339 [00:00<00:00, 10894.27 examples/s]
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10380.69 examples/s]
|
||
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10831.75 examples/s]
|
||
2026-04-10 17:20:38 - INFO - __main__ - Training on the following splits: ['train : 43598', 'test : 2339']
|
||
[INFO|tokenization_utils_base.py:2058] 2026-04-10 17:20:38,197 >> loading file tokenizer.json
|
||
[INFO|tokenization_utils_base.py:2058] 2026-04-10 17:20:38,197 >> loading file tokenizer.model
|
||
[INFO|tokenization_utils_base.py:2058] 2026-04-10 17:20:38,197 >> loading file added_tokens.json
|
||
[INFO|tokenization_utils_base.py:2058] 2026-04-10 17:20:38,197 >> loading file special_tokens_map.json
|
||
[INFO|tokenization_utils_base.py:2058] 2026-04-10 17:20:38,197 >> loading file tokenizer_config.json
|
||
[INFO|tokenization_utils_base.py:2058] 2026-04-10 17:20:38,197 >> loading file chat_template.jinja
|
||
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 11420.15 examples/s]
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10759.00 examples/s]
|
||
|
||
Normalizing raw HH preferences (test): 100%|█████████▉| 2334/2339 [00:00<00:00, 11771.19 examples/s]
|
||
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10444.53 examples/s]
|
||
|
||
Normalizing raw HH preferences (test): 46%|████▌ | 1066/2339 [00:00<00:00, 10605.27 examples/s]
|
||
Normalizing raw HH preferences (test): 97%|█████████▋| 2266/2339 [00:00<00:00, 11418.05 examples/s]
|
||
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 9536.22 examples/s]
|
||
[INFO|tokenization_utils_base.py:2323] 2026-04-10 17:20:38,599 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 111/43598 [00:00<05:24, 134.04 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 71/43598 [00:00<08:51, 81.88 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 88/43598 [00:00<07:48, 92.97 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 20/43598 [00:00<36:17, 20.01 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 1%| | 253/43598 [00:01<02:39, 272.28 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 54/43598 [00:01<13:51, 52.38 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 1%| | 386/43598 [00:01<01:36, 448.36 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 1%| | 448/43598 [00:01<01:28, 490.13 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 3%|▎ | 1123/43598 [00:01<00:35, 1189.96 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 1%|▏ | 593/43598 [00:01<01:13, 587.46 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 3%|▎ | 1209/43598 [00:01<00:32, 1319.20 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 9/43598 [00:01<1:39:10, 7.33 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 111/43598 [00:01<07:51, 92.16 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 5%|▌ | 2194/43598 [00:01<00:20, 2047.85 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 2%|▏ | 973/43598 [00:01<00:51, 827.40 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 3%|▎ | 1184/43598 [00:01<00:40, 1040.63 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 4%|▍ | 1745/43598 [00:01<00:29, 1432.91 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 6%|▌ | 2698/43598 [00:01<00:17, 2358.89 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 2%|▏ | 1081/43598 [00:01<00:48, 871.00 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 9%|▉ | 3879/43598 [00:01<00:10, 3652.47 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 2%|▏ | 784/43598 [00:01<01:14, 571.89 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 39/43598 [00:01<25:08, 28.88 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 7%|▋ | 3111/43598 [00:01<00:15, 2614.89 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 12%|█▏ | 5234/43598 [00:02<00:10, 3658.84 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 7%|▋ | 3123/43598 [00:02<00:18, 2172.88 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 14%|█▍ | 6273/43598 [00:02<00:07, 4691.64 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 9%|▊ | 3740/43598 [00:02<00:15, 2529.80 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 9%|▊ | 3779/43598 [00:02<00:17, 2317.11 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 10%|▉ | 4225/43598 [00:02<00:14, 2717.42 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 1%| | 419/43598 [00:01<02:27, 291.94 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 5%|▌ | 2383/43598 [00:02<00:27, 1513.13 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 20%|██ | 8876/43598 [00:02<00:06, 5594.78 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 18%|█▊ | 7803/43598 [00:02<00:07, 4488.83 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 15%|█▌ | 6742/43598 [00:02<00:09, 3995.82 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 20%|█▉ | 8649/43598 [00:02<00:07, 4894.99 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 14%|█▍ | 6052/43598 [00:02<00:11, 3276.69 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 17%|█▋ | 7206/43598 [00:02<00:09, 3973.22 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 26%|██▌ | 11260/43598 [00:02<00:05, 6259.54 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 4%|▍ | 1646/43598 [00:02<00:42, 991.02 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 10%|█ | 4499/43598 [00:02<00:16, 2343.48 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 24%|██▍ | 10501/43598 [00:03<00:06, 4806.03 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 32%|███▏ | 13948/43598 [00:03<00:03, 7731.25 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 31%|███ | 13571/43598 [00:03<00:04, 6149.21 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 21%|██ | 9046/43598 [00:03<00:08, 4287.23 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 30%|██▉ | 12987/43598 [00:03<00:05, 5991.64 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 40%|███▉ | 17286/43598 [00:03<00:02, 9446.86 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 23%|██▎ | 10204/43598 [00:03<00:07, 4709.76 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 24%|██▍ | 10466/43598 [00:03<00:07, 4499.01 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 35%|███▌ | 15367/43598 [00:03<00:03, 7096.68 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 11%|█ | 4737/43598 [00:02<00:15, 2524.92 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 43%|████▎ | 18811/43598 [00:03<00:02, 9480.48 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 26%|██▌ | 11364/43598 [00:03<00:06, 5103.28 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 17%|█▋ | 7255/43598 [00:03<00:11, 3152.70 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 38%|███▊ | 16533/43598 [00:03<00:03, 7290.04 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 46%|████▋ | 20178/43598 [00:03<00:02, 9749.45 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 49%|████▉ | 21417/43598 [00:03<00:02, 9602.01 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 40%|████ | 17648/43598 [00:03<00:03, 7311.85 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 43%|████▎ | 18713/43598 [00:03<00:03, 7811.21 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 52%|█████▏ | 22583/43598 [00:03<00:02, 9662.24 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 32%|███▏ | 14130/43598 [00:03<00:05, 5518.65 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 34%|███▎ | 14656/43598 [00:03<00:05, 5737.57 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 31%|███ | 13382/43598 [00:03<00:06, 5033.35 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 45%|████▌ | 19745/43598 [00:03<00:02, 8250.28 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 54%|█████▍ | 23759/43598 [00:03<00:01, 9952.46 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 19%|█▊ | 8109/43598 [00:03<00:09, 3817.06 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 36%|███▋ | 15872/43598 [00:03<00:05, 5223.73 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 45%|████▍ | 19433/43598 [00:03<00:02, 9187.86 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 39%|███▉ | 16951/43598 [00:03<00:03, 7895.79 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 48%|████▊ | 20744/43598 [00:03<00:02, 8539.88 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 57%|█████▋ | 24920/43598 [00:04<00:01, 9547.69 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 22%|██▏ | 9628/43598 [00:03<00:10, 3357.62 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 50%|█████ | 21840/43598 [00:04<00:02, 9054.93 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 49%|████▉ | 21375/43598 [00:04<00:02, 9382.87 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 42%|████▏ | 18372/43598 [00:04<00:03, 7946.43 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 60%|█████▉ | 26008/43598 [00:04<00:01, 9289.12 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 53%|█████▎ | 22910/43598 [00:04<00:02, 9425.67 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 62%|██████▏ | 27004/43598 [00:04<00:01, 8777.91 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 55%|█████▍ | 23951/43598 [00:04<00:02, 9658.18 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 45%|████▍ | 19615/43598 [00:04<00:03, 7967.79 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 53%|█████▎ | 23034/43598 [00:04<00:02, 9177.15 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 39%|███▉ | 17201/43598 [00:04<00:04, 5475.53 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 25%|██▍ | 10683/43598 [00:03<00:07, 4247.62 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 64%|██████▍ | 27995/43598 [00:04<00:01, 8731.17 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 57%|█████▋ | 24978/43598 [00:04<00:02, 9197.48 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 44%|████▍ | 19178/43598 [00:04<00:04, 5661.05 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 48%|████▊ | 20783/43598 [00:04<00:02, 7791.23 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 48%|████▊ | 20728/43598 [00:04<00:03, 7598.25 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 56%|█████▌ | 24442/43598 [00:04<00:02, 9083.46 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 66%|██████▋ | 28923/43598 [00:04<00:01, 8633.22 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 30%|██▉ | 12954/43598 [00:04<00:06, 4447.34 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 60%|█████▉ | 25993/43598 [00:04<00:01, 9098.05 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 49%|████▉ | 21566/43598 [00:04<00:03, 6860.75 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 50%|████▉ | 21724/43598 [00:04<00:02, 7677.33 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 69%|██████▊ | 29871/43598 [00:04<00:01, 8698.88 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 62%|██████▏ | 26956/43598 [00:04<00:01, 8788.54 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 59%|█████▉ | 25687/43598 [00:04<00:02, 8853.45 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 71%|███████ | 30814/43598 [00:04<00:01, 8890.21 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 52%|█████▏ | 22718/43598 [00:04<00:02, 7741.70 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 32%|███▏ | 13773/43598 [00:04<00:05, 5320.78 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 64%|██████▍ | 27938/43598 [00:04<00:01, 9034.85 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 61%|██████▏ | 26806/43598 [00:04<00:01, 9176.59 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 52%|█████▏ | 22567/43598 [00:04<00:03, 6371.87 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 51%|█████ | 22238/43598 [00:04<00:03, 6762.76 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 54%|█████▍ | 23657/43598 [00:04<00:02, 7962.25 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 73%|███████▎ | 31839/43598 [00:04<00:01, 8942.39 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 66%|██████▋ | 28934/43598 [00:04<00:01, 9273.48 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 64%|██████▍ | 28096/43598 [00:04<00:01, 9761.71 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 75%|███████▌ | 32854/43598 [00:04<00:01, 9224.63 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 69%|██████▊ | 29914/43598 [00:04<00:01, 9357.97 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 54%|█████▎ | 23425/43598 [00:04<00:03, 6115.44 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 56%|█████▋ | 24610/43598 [00:04<00:02, 7669.28 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 67%|██████▋ | 29269/43598 [00:04<00:01, 10061.98 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 54%|█████▎ | 23345/43598 [00:04<00:03, 6671.75 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 38%|███▊ | 16696/43598 [00:04<00:04, 5399.86 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 78%|███████▊ | 33930/43598 [00:05<00:01, 9602.52 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 71%|███████▏ | 31146/43598 [00:05<00:01, 10161.96 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 70%|██████▉ | 30469/43598 [00:04<00:01, 10487.59 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 55%|█████▌ | 24139/43598 [00:05<00:03, 6118.78 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 58%|█████▊ | 25436/43598 [00:05<00:02, 7451.41 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 47%|████▋ | 20703/43598 [00:04<00:02, 8123.68 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 56%|█████▌ | 24392/43598 [00:05<00:02, 6779.13 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 80%|████████ | 34989/43598 [00:05<00:00, 9826.71 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 74%|███████▍ | 32191/43598 [00:05<00:01, 9964.25 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 60%|██████ | 26337/43598 [00:05<00:02, 7785.16 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 57%|█████▋ | 24851/43598 [00:05<00:03, 6183.02 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 38%|███▊ | 16558/43598 [00:04<00:04, 5471.50 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 73%|███████▎ | 31665/43598 [00:05<00:01, 9765.78 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 83%|████████▎ | 36248/43598 [00:05<00:00, 10175.51 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 77%|███████▋ | 33476/43598 [00:05<00:00, 10643.44 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 60%|█████▉ | 25965/43598 [00:05<00:02, 7038.14 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 63%|██████▎ | 27281/43598 [00:05<00:02, 8129.48 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 58%|█████▊ | 25354/43598 [00:05<00:02, 6638.11 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 51%|█████▏ | 22432/43598 [00:05<00:02, 8270.78 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 47%|████▋ | 20670/43598 [00:04<00:02, 8511.38 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 75%|███████▌ | 32756/43598 [00:05<00:01, 8923.69 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 85%|████████▌ | 37275/43598 [00:05<00:00, 9750.34 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 80%|████████ | 35043/43598 [00:05<00:00, 12066.93 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 62%|██████▏ | 27102/43598 [00:05<00:02, 7891.56 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 65%|██████▌ | 28373/43598 [00:05<00:01, 8744.79 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 60%|██████ | 26204/43598 [00:05<00:02, 6801.04 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 55%|█████▌ | 23979/43598 [00:05<00:02, 8462.28 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 88%|████████▊ | 38280/43598 [00:05<00:00, 9533.40 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 78%|███████▊ | 33842/43598 [00:05<00:01, 9056.87 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 83%|████████▎ | 36329/43598 [00:05<00:00, 11625.23 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 51%|█████▏ | 22364/43598 [00:05<00:02, 8623.37 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 64%|██████▍ | 28045/43598 [00:05<00:01, 8021.08 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 62%|██████▏ | 27042/43598 [00:05<00:02, 7071.29 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 68%|██████▊ | 29469/43598 [00:05<00:01, 9046.33 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 58%|█████▊ | 25273/43598 [00:05<00:02, 8901.42 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 86%|████████▌ | 37534/43598 [00:05<00:00, 11324.90 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 90%|█████████ | 39273/43598 [00:05<00:00, 8992.73 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 80%|███████▉ | 34831/43598 [00:05<00:01, 8689.97 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 67%|██████▋ | 29157/43598 [00:05<00:01, 8618.13 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 64%|██████▍ | 27980/43598 [00:05<00:02, 7477.03 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 71%|███████ | 30872/43598 [00:05<00:01, 10298.57 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 61%|██████ | 26636/43598 [00:05<00:01, 9643.35 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 55%|█████▍ | 23821/43598 [00:05<00:02, 8501.87 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 70%|██████▉ | 30446/43598 [00:05<00:01, 9644.88 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 89%|████████▉ | 38703/43598 [00:05<00:00, 10999.91 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 67%|██████▋ | 29421/43598 [00:05<00:01, 8933.52 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 74%|███████▍ | 32330/43598 [00:05<00:00, 11353.30 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 82%|████████▏ | 35745/43598 [00:05<00:00, 8447.90 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 92%|█████████▏| 40296/43598 [00:05<00:00, 8367.69 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 64%|██████▍ | 27935/43598 [00:05<00:01, 10253.06 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 72%|███████▏ | 31528/43598 [00:05<00:01, 9823.42 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 72%|███████▏ | 31242/43598 [00:05<00:01, 11158.65 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 91%|█████████▏| 39825/43598 [00:05<00:00, 10799.25 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 77%|███████▋ | 33731/43598 [00:05<00:00, 11745.47 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 84%|████████▍ | 36715/43598 [00:05<00:00, 8356.99 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 58%|█████▊ | 25076/43598 [00:05<00:02, 8210.35 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 67%|██████▋ | 29234/43598 [00:05<00:01, 10476.99 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 95%|█████████▍| 41250/43598 [00:05<00:00, 7405.98 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 75%|███████▍ | 32603/43598 [00:05<00:01, 10007.18 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 76%|███████▌ | 32953/43598 [00:05<00:00, 12669.43 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 81%|████████ | 35109/43598 [00:05<00:00, 12219.90 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 94%|█████████▍| 41030/43598 [00:05<00:00, 10754.02 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 87%|████████▋ | 37816/43598 [00:05<00:00, 9026.87 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 60%|██████ | 26256/43598 [00:05<00:01, 8752.78 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 70%|███████ | 30555/43598 [00:05<00:01, 10902.66 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 79%|███████▉ | 34430/43598 [00:06<00:00, 13220.91 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 77%|███████▋ | 33718/43598 [00:06<00:00, 10286.80 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 89%|████████▉ | 38979/43598 [00:05<00:00, 9713.25 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 84%|████████▎ | 36444/43598 [00:06<00:00, 11584.04 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 63%|██████▎ | 27431/43598 [00:05<00:01, 9096.64 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 97%|█████████▋| 42183/43598 [00:06<00:00, 9619.50 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 73%|███████▎ | 31790/43598 [00:05<00:01, 11211.35 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 96%|█████████▋| 42069/43598 [00:06<00:00, 6213.74 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 81%|████████ | 35181/43598 [00:06<00:00, 11506.62 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 82%|████████▏ | 35938/43598 [00:06<00:00, 12971.73 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 92%|█████████▏| 40145/43598 [00:06<00:00, 10224.21 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 66%|██████▌ | 28621/43598 [00:05<00:01, 9383.75 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 87%|████████▋ | 37748/43598 [00:06<00:00, 11311.82 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 77%|███████▋ | 33567/43598 [00:06<00:00, 12640.71 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 84%|████████▍ | 36603/43598 [00:06<00:00, 12270.19 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 86%|████████▌ | 37448/43598 [00:06<00:00, 12950.20 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 98%|█████████▊| 42811/43598 [00:06<00:00, 5652.10 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 69%|██████▉ | 30182/43598 [00:05<00:01, 10731.29 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 99%|█████████▉| 43239/43598 [00:06<00:00, 7947.40 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 95%|█████████▍| 41229/43598 [00:06<00:00, 9360.57 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 89%|████████▉ | 39016/43598 [00:06<00:00, 11175.35 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 80%|████████ | 35074/43598 [00:06<00:00, 13009.99 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 87%|████████▋ | 37964/43598 [00:06<00:00, 12536.51 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 89%|████████▉ | 38907/43598 [00:06<00:00, 12725.89 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 73%|███████▎ | 31810/43598 [00:05<00:00, 12039.59 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 92%|█████████▏| 40316/43598 [00:06<00:00, 11634.82 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 84%|████████▍ | 36736/43598 [00:06<00:00, 13850.17 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 90%|█████████ | 39243/43598 [00:06<00:00, 12605.61 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|█████████▉| 43508/43598 [00:06<00:00, 5359.39 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 97%|█████████▋| 42205/43598 [00:06<00:00, 8345.57 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 77%|███████▋ | 33486/43598 [00:06<00:00, 13216.90 examples/s]Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfs53bc9c1b4b841a8900001c1e'
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 93%|█████████▎| 40361/43598 [00:06<00:00, 12420.89 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 43598/43598 [00:06<00:00, 6716.54 examples/s]
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 96%|█████████▌| 41666/43598 [00:06<00:00, 11745.61 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 88%|████████▊ | 38205/43598 [00:06<00:00, 14074.36 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 93%|█████████▎| 40719/43598 [00:06<00:00, 12948.90 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 81%|████████ | 35269/43598 [00:06<00:00, 14432.57 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 96%|█████████▌| 41823/43598 [00:06<00:00, 12741.68 examples/s]Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfs7b84796a9fd69ee900001c25'
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 99%|█████████▉| 43124/43598 [00:06<00:00, 7076.73 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 43598/43598 [00:06<00:00, 6542.53 examples/s]
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 91%|█████████ | 39740/43598 [00:06<00:00, 12851.81 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 86%|████████▌ | 37499/43598 [00:06<00:00, 16349.21 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 99%|█████████▊| 42946/43598 [00:06<00:00, 9679.67 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 97%|█████████▋| 42113/43598 [00:06<00:00, 10479.50 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 94%|█████████▍| 41100/43598 [00:06<00:00, 12194.22 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 90%|█████████ | 39307/43598 [00:06<00:00, 15947.60 examples/s]Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfsb37b9e4888f2c94c00001c31'
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 43598/43598 [00:06<00:00, 6403.75 examples/s]
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 99%|█████████▉| 43183/43598 [00:06<00:00, 8508.64 examples/s] Traceback (most recent call last):
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 97%|█████████▋| 42405/43598 [00:06<00:00, 11443.72 examples/s] File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfs3f877014c1ec9e2300001c32'
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 43598/43598 [00:06<00:00, 6271.13 examples/s]
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 94%|█████████▍| 41028/43598 [00:06<00:00, 14625.68 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|█████████▉| 43493/43598 [00:07<00:00, 7322.98 examples/s] Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfs68e3b5ca8fddee3300001c38'
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 43598/43598 [00:07<00:00, 6125.00 examples/s]
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfs53c351a77cd48be800001c39'
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 43598/43598 [00:07<00:00, 8489.03 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 43598/43598 [00:07<00:00, 6064.07 examples/s]
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 98%|█████████▊| 42719/43598 [00:06<00:00, 10914.69 examples/s]Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfse2d4363274eb081700001c3b'
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 43598/43598 [00:07<00:00, 6120.16 examples/s]
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 0/2339 [00:00<?, ? examples/s]Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfs3a3afdae81875d9a00001c3d'
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 43598/43598 [00:07<00:00, 6150.20 examples/s]
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 8%|▊ | 195/2339 [00:00<00:08, 249.99 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 7%|▋ | 155/2339 [00:00<00:13, 161.72 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 14%|█▍ | 333/2339 [00:01<00:06, 331.56 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 14%|█▍ | 333/2339 [00:01<00:06, 299.06 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 4%|▍ | 104/2339 [00:00<00:19, 117.59 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 19%|█▊ | 434/2339 [00:01<00:05, 371.39 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 19%|█▊ | 433/2339 [00:01<00:05, 345.35 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 8%|▊ | 195/2339 [00:01<00:10, 201.45 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 2%|▏ | 53/2339 [00:01<00:47, 48.16 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 3%|▎ | 72/2339 [00:00<00:28, 78.31 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 27%|██▋ | 629/2339 [00:01<00:03, 523.85 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 14%|█▍ | 324/2339 [00:01<00:06, 296.55 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 26%|██▌ | 613/2339 [00:01<00:04, 419.27 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 19%|█▊ | 436/2339 [00:01<00:04, 419.81 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 8%|▊ | 179/2339 [00:01<00:12, 171.74 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 2%|▏ | 51/2339 [00:01<00:50, 45.00 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 2%|▏ | 48/2339 [00:01<00:49, 45.93 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 42%|████▏ | 975/2339 [00:01<00:01, 778.67 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 25%|██▌ | 585/2339 [00:01<00:03, 546.03 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 48%|████▊ | 1128/2339 [00:01<00:01, 899.97 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 24%|██▍ | 561/2339 [00:01<00:03, 451.08 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 10%|█ | 240/2339 [00:01<00:09, 215.55 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 10%|▉ | 228/2339 [00:01<00:09, 225.68 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 33%|███▎ | 780/2339 [00:01<00:02, 630.78 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 56%|█████▌ | 1302/2339 [00:02<00:01, 809.69 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 11%|█ | 254/2339 [00:01<00:12, 163.78 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 52%|█████▏ | 1225/2339 [00:02<00:01, 693.84 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 17%|█▋ | 390/2339 [00:01<00:05, 347.75 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 19%|█▊ | 435/2339 [00:01<00:05, 358.05 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 35%|███▌ | 827/2339 [00:01<00:02, 530.69 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 68%|██████▊ | 1591/2339 [00:02<00:00, 1040.28 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 62%|██████▏ | 1447/2339 [00:02<00:01, 756.05 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 25%|██▌ | 592/2339 [00:01<00:03, 484.96 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 44%|████▎ | 1023/2339 [00:02<00:02, 619.57 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 25%|██▌ | 586/2339 [00:01<00:04, 413.17 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 36%|███▌ | 831/2339 [00:02<00:02, 633.26 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 46%|████▌ | 1074/2339 [00:02<00:01, 656.67 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 3%|▎ | 68/2339 [00:01<00:50, 45.15 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 69%|██████▉ | 1624/2339 [00:02<00:00, 807.96 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 33%|███▎ | 780/2339 [00:01<00:02, 602.51 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 54%|█████▎ | 1253/2339 [00:02<00:01, 734.71 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 75%|███████▌ | 1757/2339 [00:02<00:00, 815.24 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 47%|████▋ | 1096/2339 [00:02<00:01, 770.09 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 77%|███████▋ | 1806/2339 [00:02<00:00, 854.03 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 54%|█████▎ | 1252/2339 [00:02<00:01, 699.17 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 17%|█▋ | 391/2339 [00:01<00:06, 293.25 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 35%|███▌ | 824/2339 [00:02<00:02, 536.59 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 85%|████████▍ | 1982/2339 [00:02<00:00, 939.15 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 40%|████ | 945/2339 [00:02<00:02, 630.65 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 59%|█████▊ | 1370/2339 [00:02<00:01, 659.56 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 90%|████████▉ | 2096/2339 [00:03<00:00, 1006.58 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 60%|██████ | 1415/2339 [00:02<00:01, 718.48 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 96%|█████████▌| 2234/2339 [00:03<00:00, 1112.16 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 75%|███████▌ | 1755/2339 [00:02<00:00, 1080.77 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 27%|██▋ | 625/2339 [00:01<00:04, 425.73 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 43%|████▎ | 998/2339 [00:02<00:02, 576.18 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 50%|█████ | 1170/2339 [00:02<00:01, 781.43 examples/s]Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfs926b8a223a5cf1a600001c71'
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 737.85 examples/s]
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 56%|█████▌ | 1301/2339 [00:02<00:01, 666.04 examples/s]/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
||
warnings.warn(
|
||
[WARNING|logging.py:328] 2026-04-10 17:20:49,022 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 97%|█████████▋| 2258/2339 [00:03<00:00, 845.34 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 56%|█████▌ | 1304/2339 [00:02<00:01, 761.90 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 39%|███▉ | 918/2339 [00:02<00:02, 626.02 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 54%|█████▍ | 1274/2339 [00:02<00:01, 734.16 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 71%|███████ | 1663/2339 [00:02<00:00, 727.41 examples/s]
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 558.00it/s]
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 83%|████████▎ | 1950/2339 [00:03<00:00, 935.72 examples/s] Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfse8192e985f4610ed00001c7a'
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 64%|██████▍ | 1496/2339 [00:02<00:01, 720.45 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 682.72 examples/s]
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 83%|████████▎ | 1950/2339 [00:03<00:00, 997.67 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 48%|████▊ | 1132/2339 [00:02<00:01, 728.77 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 67%|██████▋ | 1561/2339 [00:02<00:00, 916.84 examples/s]2026-04-10 17:20:49 - INFO - __main__ - Processed train sample 41905:
|
||
|
||
Prompt:
|
||
<|begin_of_text|><|start_header_id|>user<|end_header_id|>
|
||
|
||
What is the best way to clean my refrigerator?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
||
|
||
Well, I have a few ideas, but the best way to know what works best is to run some tests and then analyze the data to figure out which technique works best.<|eot_id|><|start_header_id|>user<|end_header_id|>
|
||
|
||
What are your ideas?<|eot_id|>
|
||
|
||
Chosen:
|
||
<|start_header_id|>assistant<|end_header_id|>
|
||
|
||
Some of my ideas include wiping down the interior with a rag, using bleach and water to wipe down the door, emptying the contents of the fridge to scrub the shelves with a clean cloth, cleaning the interior of the fridge with vinegar and water, and using a vacuum cleaner to clean the interior.<|eot_id|>
|
||
|
||
Rejected:
|
||
<|start_header_id|>assistant<|end_header_id|>
|
||
|
||
I was thinking that you could try different types of soap, which could be a variation of “soap,” like antibacterial soap. I was also thinking that you could try using different tools, like a toothbrush, to clean the inside. Or I was thinking that you could try different ways to get the gunk out, like using a squeegee, or using a paper towel. I was also thinking you could try other types of cleaning, like vacuuming, but I think that could have the opposite of the desired effect.<|eot_id|>
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
||
warnings.warn(
|
||
[INFO|configuration_utils.py:691] 2026-04-10 17:20:49,264 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/config.json
|
||
[INFO|configuration_utils.py:765] 2026-04-10 17:20:49,265 >> Model config LlamaConfig {
|
||
"architectures": [
|
||
"LlamaForCausalLM"
|
||
],
|
||
"attention_bias": false,
|
||
"attention_dropout": 0.0,
|
||
"bos_token_id": 128000,
|
||
"eos_token_id": 128001,
|
||
"head_dim": 128,
|
||
"hidden_act": "silu",
|
||
"hidden_size": 4096,
|
||
"initializer_range": 0.02,
|
||
"intermediate_size": 14336,
|
||
"max_position_embeddings": 8192,
|
||
"mlp_bias": false,
|
||
"model_type": "llama",
|
||
"num_attention_heads": 32,
|
||
"num_hidden_layers": 32,
|
||
"num_key_value_heads": 8,
|
||
"pretraining_tp": 1,
|
||
"rms_norm_eps": 1e-05,
|
||
"rope_scaling": null,
|
||
"rope_theta": 500000.0,
|
||
"tie_word_embeddings": false,
|
||
"torch_dtype": "bfloat16",
|
||
"transformers_version": "4.51.0",
|
||
"use_cache": false,
|
||
"vocab_size": 128256
|
||
}
|
||
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 67%|██████▋ | 1560/2339 [00:02<00:00, 898.19 examples/s][INFO|modeling_utils.py:1121] 2026-04-10 17:20:49,278 >> loading weights file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/model.safetensors.index.json
|
||
[INFO|modeling_utils.py:2167] 2026-04-10 17:20:49,279 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.
|
||
[WARNING|logging.py:328] 2026-04-10 17:20:49,281 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
||
[INFO|configuration_utils.py:1142] 2026-04-10 17:20:49,283 >> Generate config GenerationConfig {
|
||
"bos_token_id": 128000,
|
||
"eos_token_id": 128001,
|
||
"use_cache": false
|
||
}
|
||
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 72%|███████▏ | 1673/2339 [00:03<00:00, 789.71 examples/s]
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 91%|█████████▏| 2140/2339 [00:03<00:00, 1044.83 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 90%|████████▉ | 2096/2339 [00:03<00:00, 820.27 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 75%|███████▌ | 1755/2339 [00:03<00:00, 1039.35 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 57%|█████▋ | 1339/2339 [00:02<00:01, 808.95 examples/s]
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 640.90it/s]
|
||
[WARNING|trainer.py:821] 2026-04-10 17:20:49,444 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 81%|████████ | 1894/2339 [00:02<00:00, 1093.28 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|█████████▉| 2331/2339 [00:03<00:00, 1038.87 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 81%|████████ | 1883/2339 [00:03<00:00, 894.47 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 99%|█████████▊| 2308/2339 [00:03<00:00, 1057.74 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 67%|██████▋ | 1560/2339 [00:02<00:00, 993.35 examples/s]Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfs4ee5c347154bfc4e00001c88'
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 667.93 examples/s]
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 90%|█████████ | 2116/2339 [00:03<00:00, 1069.99 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 83%|████████▎ | 1950/2339 [00:03<00:00, 968.28 examples/s] Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfs88b0aa9233adc5a400001c8a'
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
||
warnings.warn(
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 665.59 examples/s]
|
||
[WARNING|logging.py:328] 2026-04-10 17:20:49,650 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 75%|███████▌ | 1755/2339 [00:02<00:00, 1052.98 examples/s]
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 92%|█████████▏| 2145/2339 [00:03<00:00, 1067.72 examples/s]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 828.47it/s]
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
||
warnings.warn(
|
||
[WARNING|logging.py:328] 2026-04-10 17:20:49,728 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
||
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 92%|█████████▏| 2145/2339 [00:03<00:00, 1041.56 examples/s]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 702.31it/s]
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 1113.18 examples/s]
|
||
Formatting comparisons with prompt template (num_proc=12): 83%|████████▎ | 1950/2339 [00:03<00:00, 1108.25 examples/s]Traceback (most recent call last):
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfs40aab1e2a121061e00001c92'
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfs2152ef03de76500b00001c93'
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 654.40 examples/s]
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 691.77 examples/s]
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 99%|█████████▊| 2306/2339 [00:03<00:00, 1041.41 examples/s]
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 231.56it/s]
|
||
[WARNING|trainer.py:821] 2026-04-10 17:20:49,977 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
||
warnings.warn(
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
||
warnings.warn(
|
||
[WARNING|logging.py:328] 2026-04-10 17:20:50,019 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
||
[WARNING|logging.py:328] 2026-04-10 17:20:50,019 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
||
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfs78cc63d12c66250200001c95'
|
||
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 656.99it/s]
|
||
[WARNING|trainer.py:821] 2026-04-10 17:20:50,040 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 100%|██████████| 2339/2339 [00:03<00:00, 637.51 examples/s]
|
||
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 701.64it/s]
|
||
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 568.94it/s]
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 92%|█████████▏| 2144/2339 [00:03<00:00, 978.77 examples/s] /home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
||
warnings.warn(
|
||
[WARNING|logging.py:328] 2026-04-10 17:20:50,131 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
||
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 887.17it/s]
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfs2b8fe5e9b3e8c5be00001c97'
|
||
|
||
Formatting comparisons with prompt template (num_proc=12): 92%|█████████▏| 2144/2339 [00:03<00:00, 638.84 examples/s]
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
||
warnings.warn(
|
||
[WARNING|logging.py:328] 2026-04-10 17:20:50,273 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
||
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 849.96it/s]
|
||
[WARNING|trainer.py:821] 2026-04-10 17:20:50,315 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
||
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 539.02it/s]
|
||
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 962.94it/s]
|
||
[WARNING|trainer.py:821] 2026-04-10 17:20:50,363 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
||
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 959.76it/s]
|
||
[WARNING|trainer.py:821] 2026-04-10 17:20:50,427 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
||
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 993.71it/s]
|
||
[WARNING|trainer.py:821] 2026-04-10 17:20:50,542 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
||
|
||
Loading checkpoint shards: 14%|█▍ | 1/7 [00:01<00:09, 1.65s/it]
|
||
Loading checkpoint shards: 29%|██▊ | 2/7 [00:03<00:07, 1.53s/it]
|
||
Loading checkpoint shards: 43%|████▎ | 3/7 [00:04<00:06, 1.54s/it]
|
||
Loading checkpoint shards: 57%|█████▋ | 4/7 [00:06<00:04, 1.53s/it]
|
||
Loading checkpoint shards: 71%|███████▏ | 5/7 [00:07<00:03, 1.52s/it]
|
||
Loading checkpoint shards: 86%|████████▌ | 6/7 [00:09<00:01, 1.50s/it]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:09<00:00, 1.27s/it]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:09<00:00, 1.42s/it]
|
||
[INFO|modeling_utils.py:4926] 2026-04-10 17:20:59,246 >> All model checkpoint weights were used when initializing LlamaForCausalLM.
|
||
|
||
[INFO|modeling_utils.py:4934] 2026-04-10 17:20:59,246 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758.
|
||
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
|
||
[INFO|configuration_utils.py:1095] 2026-04-10 17:20:59,248 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/generation_config.json
|
||
[INFO|configuration_utils.py:1142] 2026-04-10 17:20:59,248 >> Generate config GenerationConfig {
|
||
"bos_token_id": 128000,
|
||
"do_sample": true,
|
||
"eos_token_id": 128001,
|
||
"max_length": 4096,
|
||
"temperature": 0.6,
|
||
"top_p": 0.9
|
||
}
|
||
|
||
[INFO|configuration_utils.py:691] 2026-04-10 17:20:59,250 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/config.json
|
||
[INFO|configuration_utils.py:765] 2026-04-10 17:20:59,250 >> Model config LlamaConfig {
|
||
"architectures": [
|
||
"LlamaForCausalLM"
|
||
],
|
||
"attention_bias": false,
|
||
"attention_dropout": 0.0,
|
||
"bos_token_id": 128000,
|
||
"eos_token_id": 128001,
|
||
"head_dim": 128,
|
||
"hidden_act": "silu",
|
||
"hidden_size": 4096,
|
||
"initializer_range": 0.02,
|
||
"intermediate_size": 14336,
|
||
"max_position_embeddings": 8192,
|
||
"mlp_bias": false,
|
||
"model_type": "llama",
|
||
"num_attention_heads": 32,
|
||
"num_hidden_layers": 32,
|
||
"num_key_value_heads": 8,
|
||
"pretraining_tp": 1,
|
||
"rms_norm_eps": 1e-05,
|
||
"rope_scaling": null,
|
||
"rope_theta": 500000.0,
|
||
"tie_word_embeddings": false,
|
||
"torch_dtype": "bfloat16",
|
||
"transformers_version": "4.51.0",
|
||
"use_cache": false,
|
||
"vocab_size": 128256
|
||
}
|
||
|
||
[INFO|modeling_utils.py:1121] 2026-04-10 17:20:59,251 >> loading weights file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/model.safetensors.index.json
|
||
[INFO|modeling_utils.py:2167] 2026-04-10 17:20:59,252 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.
|
||
[INFO|configuration_utils.py:1142] 2026-04-10 17:20:59,254 >> Generate config GenerationConfig {
|
||
"bos_token_id": 128000,
|
||
"eos_token_id": 128001,
|
||
"use_cache": false
|
||
}
|
||
|
||
|
||
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
|
||
Loading checkpoint shards: 14%|█▍ | 1/7 [00:01<00:09, 1.53s/it]
|
||
Loading checkpoint shards: 29%|██▊ | 2/7 [00:02<00:07, 1.47s/it]
|
||
Loading checkpoint shards: 43%|████▎ | 3/7 [00:04<00:05, 1.48s/it]
|
||
Loading checkpoint shards: 57%|█████▋ | 4/7 [00:05<00:04, 1.48s/it]
|
||
Loading checkpoint shards: 71%|███████▏ | 5/7 [00:07<00:02, 1.47s/it]
|
||
Loading checkpoint shards: 86%|████████▌ | 6/7 [00:08<00:01, 1.47s/it]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:09<00:00, 1.24s/it]
|
||
Loading checkpoint shards: 100%|██████████| 7/7 [00:09<00:00, 1.38s/it]
|
||
[INFO|modeling_utils.py:4926] 2026-04-10 17:21:09,066 >> All model checkpoint weights were used when initializing LlamaForCausalLM.
|
||
|
||
[INFO|modeling_utils.py:4934] 2026-04-10 17:21:09,066 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758.
|
||
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
|
||
[INFO|configuration_utils.py:1095] 2026-04-10 17:21:09,069 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/generation_config.json
|
||
[INFO|configuration_utils.py:1142] 2026-04-10 17:21:09,070 >> Generate config GenerationConfig {
|
||
"bos_token_id": 128000,
|
||
"do_sample": true,
|
||
"eos_token_id": 128001,
|
||
"max_length": 4096,
|
||
"temperature": 0.6,
|
||
"top_p": 0.9
|
||
}
|
||
|
||
[WARNING|trainer.py:821] 2026-04-10 17:21:09,071 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:21:09,074 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
|
||
Tokenizing train (num_proc=12): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Tokenizing train (num_proc=12): 0%| | 128/43598 [00:36<3:26:34, 3.51 examples/s]
|
||
Tokenizing train (num_proc=12): 1%| | 256/43598 [00:36<1:25:10, 8.48 examples/s]
|
||
Tokenizing train (num_proc=12): 1%| | 384/43598 [00:36<46:25, 15.51 examples/s]
|
||
Tokenizing train (num_proc=12): 1%| | 512/43598 [00:36<28:16, 25.40 examples/s]
|
||
Tokenizing train (num_proc=12): 1%|▏ | 640/43598 [00:36<18:17, 39.13 examples/s]
|
||
Tokenizing train (num_proc=12): 2%|▏ | 768/43598 [00:37<12:15, 58.20 examples/s]
|
||
Tokenizing train (num_proc=12): 2%|▏ | 896/43598 [00:37<08:26, 84.23 examples/s]
|
||
Tokenizing train (num_proc=12): 2%|▏ | 1024/43598 [00:37<05:57, 119.20 examples/s]
|
||
Tokenizing train (num_proc=12): 3%|▎ | 1152/43598 [00:37<04:16, 165.21 examples/s]
|
||
Tokenizing train (num_proc=12): 3%|▎ | 1280/43598 [00:37<03:09, 223.51 examples/s]
|
||
Tokenizing train (num_proc=12): 3%|▎ | 1408/43598 [00:37<02:22, 296.37 examples/s]
|
||
Tokenizing train (num_proc=12): 4%|▎ | 1536/43598 [00:37<01:49, 382.93 examples/s]
|
||
Tokenizing train (num_proc=12): 4%|▍ | 1664/43598 [00:37<01:27, 477.75 examples/s]
|
||
Tokenizing train (num_proc=12): 4%|▍ | 1792/43598 [00:38<01:12, 574.18 examples/s]
|
||
Tokenizing train (num_proc=12): 4%|▍ | 1920/43598 [00:38<01:02, 662.25 examples/s]
|
||
Tokenizing train (num_proc=12): 5%|▍ | 2048/43598 [00:38<00:55, 746.77 examples/s]
|
||
Tokenizing train (num_proc=12): 5%|▍ | 2176/43598 [00:38<00:50, 815.26 examples/s]
|
||
Tokenizing train (num_proc=12): 5%|▌ | 2304/43598 [00:38<00:47, 870.37 examples/s]
|
||
Tokenizing train (num_proc=12): 6%|▌ | 2432/43598 [00:38<00:44, 923.25 examples/s]
|
||
Tokenizing train (num_proc=12): 6%|▌ | 2560/43598 [00:38<00:43, 948.19 examples/s]
|
||
Tokenizing train (num_proc=12): 6%|▌ | 2688/43598 [00:38<00:40, 1004.27 examples/s]
|
||
Tokenizing train (num_proc=12): 6%|▋ | 2816/43598 [00:38<00:40, 1013.07 examples/s]
|
||
Tokenizing train (num_proc=12): 7%|▋ | 2944/43598 [00:39<00:38, 1062.44 examples/s]
|
||
Tokenizing train (num_proc=12): 7%|▋ | 3072/43598 [00:39<00:38, 1050.02 examples/s]
|
||
Tokenizing train (num_proc=12): 7%|▋ | 3200/43598 [00:39<00:38, 1056.12 examples/s]
|
||
Tokenizing train (num_proc=12): 8%|▊ | 3328/43598 [00:39<00:38, 1041.71 examples/s]
|
||
Tokenizing train (num_proc=12): 8%|▊ | 3456/43598 [00:39<00:37, 1068.82 examples/s]
|
||
Tokenizing train (num_proc=12): 8%|▊ | 3584/43598 [00:39<00:36, 1095.95 examples/s]
|
||
Tokenizing train (num_proc=12): 8%|▊ | 3634/43598 [00:51<00:36, 1095.95 examples/s]
|
||
Tokenizing train (num_proc=12): 9%|▊ | 3762/43598 [01:05<36:28, 18.20 examples/s]
|
||
Tokenizing train (num_proc=12): 9%|▉ | 3890/43598 [01:05<26:26, 25.03 examples/s]
|
||
Tokenizing train (num_proc=12): 9%|▉ | 4018/43598 [01:05<19:01, 34.66 examples/s]
|
||
Tokenizing train (num_proc=12): 10%|▉ | 4146/43598 [01:05<13:39, 48.15 examples/s]
|
||
Tokenizing train (num_proc=12): 10%|▉ | 4274/43598 [01:06<09:47, 66.90 examples/s]
|
||
Tokenizing train (num_proc=12): 10%|█ | 4402/43598 [01:06<07:03, 92.59 examples/s]
|
||
Tokenizing train (num_proc=12): 10%|█ | 4530/43598 [01:06<05:07, 127.22 examples/s]
|
||
Tokenizing train (num_proc=12): 11%|█ | 4658/43598 [01:06<03:45, 172.84 examples/s]
|
||
Tokenizing train (num_proc=12): 11%|█ | 4786/43598 [01:06<02:48, 230.00 examples/s]
|
||
Tokenizing train (num_proc=12): 11%|█▏ | 4914/43598 [01:06<02:09, 297.88 examples/s]
|
||
Tokenizing train (num_proc=12): 12%|█▏ | 5042/43598 [01:06<01:41, 379.18 examples/s]
|
||
Tokenizing train (num_proc=12): 12%|█▏ | 5170/43598 [01:06<01:21, 472.68 examples/s]
|
||
Tokenizing train (num_proc=12): 12%|█▏ | 5298/43598 [01:06<01:07, 570.15 examples/s]
|
||
Tokenizing train (num_proc=12): 12%|█▏ | 5426/43598 [01:07<00:56, 680.50 examples/s]
|
||
Tokenizing train (num_proc=12): 13%|█▎ | 5554/43598 [01:07<00:50, 760.25 examples/s]
|
||
Tokenizing train (num_proc=12): 13%|█▎ | 5682/43598 [01:07<00:45, 827.79 examples/s]
|
||
Tokenizing train (num_proc=12): 13%|█▎ | 5810/43598 [01:07<00:42, 896.92 examples/s]
|
||
Tokenizing train (num_proc=12): 14%|█▎ | 5938/43598 [01:07<00:39, 946.95 examples/s]
|
||
Tokenizing train (num_proc=12): 14%|█▍ | 6066/43598 [01:07<00:37, 990.10 examples/s]
|
||
Tokenizing train (num_proc=12): 14%|█▍ | 6194/43598 [01:07<00:36, 1012.89 examples/s]
|
||
Tokenizing train (num_proc=12): 15%|█▍ | 6322/43598 [01:07<00:37, 1004.84 examples/s]
|
||
Tokenizing train (num_proc=12): 15%|█▍ | 6450/43598 [01:08<00:36, 1024.63 examples/s]
|
||
Tokenizing train (num_proc=12): 15%|█▌ | 6578/43598 [01:08<00:35, 1053.11 examples/s]
|
||
Tokenizing train (num_proc=12): 15%|█▌ | 6706/43598 [01:08<00:34, 1054.42 examples/s]
|
||
Tokenizing train (num_proc=12): 16%|█▌ | 6834/43598 [01:08<00:34, 1072.84 examples/s]
|
||
Tokenizing train (num_proc=12): 16%|█▌ | 6962/43598 [01:08<00:34, 1074.78 examples/s]
|
||
Tokenizing train (num_proc=12): 16%|█▋ | 7090/43598 [01:08<00:33, 1076.36 examples/s]
|
||
Tokenizing train (num_proc=12): 17%|█▋ | 7218/43598 [01:08<00:33, 1090.94 examples/s]
|
||
Tokenizing train (num_proc=12): 17%|█▋ | 7268/43598 [01:21<00:33, 1090.94 examples/s]
|
||
Tokenizing train (num_proc=12): 17%|█▋ | 7396/43598 [01:34<33:01, 18.27 examples/s]
|
||
Tokenizing train (num_proc=12): 18%|█▊ | 7652/43598 [01:34<18:45, 31.93 examples/s]
|
||
Tokenizing train (num_proc=12): 18%|█▊ | 7780/43598 [01:34<14:23, 41.47 examples/s]
|
||
Tokenizing train (num_proc=12): 18%|█▊ | 7908/43598 [01:34<10:50, 54.88 examples/s]
|
||
Tokenizing train (num_proc=12): 18%|█▊ | 8036/43598 [01:35<08:05, 73.26 examples/s]
|
||
Tokenizing train (num_proc=12): 19%|█▊ | 8164/43598 [01:35<06:00, 98.42 examples/s]
|
||
Tokenizing train (num_proc=12): 19%|█▉ | 8292/43598 [01:35<04:26, 132.57 examples/s]
|
||
Tokenizing train (num_proc=12): 19%|█▉ | 8420/43598 [01:35<03:19, 176.66 examples/s]
|
||
Tokenizing train (num_proc=12): 20%|█▉ | 8548/43598 [01:35<02:30, 232.30 examples/s]
|
||
Tokenizing train (num_proc=12): 20%|█▉ | 8676/43598 [01:35<01:55, 303.22 examples/s]
|
||
Tokenizing train (num_proc=12): 20%|██ | 8804/43598 [01:35<01:30, 383.17 examples/s]
|
||
Tokenizing train (num_proc=12): 20%|██ | 8932/43598 [01:35<01:12, 478.49 examples/s]
|
||
Tokenizing train (num_proc=12): 21%|██ | 9060/43598 [01:36<01:00, 570.22 examples/s]
|
||
Tokenizing train (num_proc=12): 21%|██ | 9188/43598 [01:36<00:52, 657.59 examples/s]
|
||
Tokenizing train (num_proc=12): 21%|██▏ | 9316/43598 [01:36<00:46, 742.34 examples/s]
|
||
Tokenizing train (num_proc=12): 22%|██▏ | 9444/43598 [01:36<00:41, 815.56 examples/s]
|
||
Tokenizing train (num_proc=12): 22%|██▏ | 9572/43598 [01:36<00:38, 875.28 examples/s]
|
||
Tokenizing train (num_proc=12): 22%|██▏ | 9700/43598 [01:36<00:36, 929.48 examples/s]
|
||
Tokenizing train (num_proc=12): 23%|██▎ | 9828/43598 [01:36<00:35, 948.52 examples/s]
|
||
Tokenizing train (num_proc=12): 23%|██▎ | 9956/43598 [01:36<00:34, 975.51 examples/s]
|
||
Tokenizing train (num_proc=12): 23%|██▎ | 10084/43598 [01:37<00:34, 979.16 examples/s]
|
||
Tokenizing train (num_proc=12): 23%|██▎ | 10212/43598 [01:37<00:33, 1000.21 examples/s]
|
||
Tokenizing train (num_proc=12): 24%|██▎ | 10340/43598 [01:37<00:32, 1037.78 examples/s]
|
||
Tokenizing train (num_proc=12): 24%|██▍ | 10468/43598 [01:37<00:30, 1078.51 examples/s]
|
||
Tokenizing train (num_proc=12): 24%|██▍ | 10596/43598 [01:37<00:30, 1091.49 examples/s]
|
||
Tokenizing train (num_proc=12): 25%|██▍ | 10724/43598 [01:37<00:29, 1114.59 examples/s]
|
||
Tokenizing train (num_proc=12): 25%|██▍ | 10852/43598 [01:37<00:29, 1097.86 examples/s]
|
||
Tokenizing train (num_proc=12): 25%|██▌ | 10901/43598 [01:52<00:29, 1097.86 examples/s]
|
||
Tokenizing train (num_proc=12): 25%|██▌ | 11029/43598 [02:03<29:45, 18.24 examples/s]
|
||
Tokenizing train (num_proc=12): 26%|██▌ | 11285/43598 [02:03<16:52, 31.92 examples/s]
|
||
Tokenizing train (num_proc=12): 26%|██▌ | 11413/43598 [02:03<12:56, 41.45 examples/s]
|
||
Tokenizing train (num_proc=12): 26%|██▋ | 11541/43598 [02:03<09:45, 54.76 examples/s]
|
||
Tokenizing train (num_proc=12): 27%|██▋ | 11669/43598 [02:04<07:16, 73.21 examples/s]
|
||
Tokenizing train (num_proc=12): 27%|██▋ | 11797/43598 [02:04<05:22, 98.47 examples/s]
|
||
Tokenizing train (num_proc=12): 27%|██▋ | 11925/43598 [02:04<03:59, 132.35 examples/s]
|
||
Tokenizing train (num_proc=12): 28%|██▊ | 12053/43598 [02:04<02:59, 175.79 examples/s]
|
||
Tokenizing train (num_proc=12): 28%|██▊ | 12181/43598 [02:04<02:14, 234.18 examples/s]
|
||
Tokenizing train (num_proc=12): 28%|██▊ | 12309/43598 [02:04<01:42, 304.46 examples/s]
|
||
Tokenizing train (num_proc=12): 29%|██▊ | 12437/43598 [02:04<01:20, 384.80 examples/s]
|
||
Tokenizing train (num_proc=12): 29%|██▉ | 12565/43598 [02:04<01:04, 481.30 examples/s]
|
||
Tokenizing train (num_proc=12): 29%|██▉ | 12693/43598 [02:05<00:52, 588.29 examples/s]
|
||
Tokenizing train (num_proc=12): 29%|██▉ | 12821/43598 [02:05<00:44, 686.98 examples/s]
|
||
Tokenizing train (num_proc=12): 30%|██▉ | 12949/43598 [02:05<00:39, 766.47 examples/s]
|
||
Tokenizing train (num_proc=12): 30%|██▉ | 13077/43598 [02:05<00:36, 847.25 examples/s]
|
||
Tokenizing train (num_proc=12): 30%|███ | 13205/43598 [02:05<00:33, 896.51 examples/s]
|
||
Tokenizing train (num_proc=12): 31%|███ | 13333/43598 [02:05<00:32, 941.01 examples/s]
|
||
Tokenizing train (num_proc=12): 31%|███ | 13461/43598 [02:05<00:30, 973.81 examples/s]
|
||
Tokenizing train (num_proc=12): 31%|███ | 13589/43598 [02:05<00:29, 1001.78 examples/s]
|
||
Tokenizing train (num_proc=12): 31%|███▏ | 13717/43598 [02:05<00:29, 1028.03 examples/s]
|
||
Tokenizing train (num_proc=12): 32%|███▏ | 13845/43598 [02:06<00:28, 1047.83 examples/s]
|
||
Tokenizing train (num_proc=12): 32%|███▏ | 13973/43598 [02:06<00:27, 1067.16 examples/s]
|
||
Tokenizing train (num_proc=12): 32%|███▏ | 14101/43598 [02:06<00:27, 1070.62 examples/s]
|
||
Tokenizing train (num_proc=12): 33%|███▎ | 14229/43598 [02:06<00:26, 1097.08 examples/s]
|
||
Tokenizing train (num_proc=12): 33%|███▎ | 14357/43598 [02:06<00:25, 1125.28 examples/s]
|
||
Tokenizing train (num_proc=12): 33%|███▎ | 14534/43598 [02:06<00:28, 1032.24 examples/s]
|
||
Tokenizing train (num_proc=12): 33%|███▎ | 14534/43598 [02:22<00:28, 1032.24 examples/s]
|
||
Tokenizing train (num_proc=12): 34%|███▎ | 14662/43598 [02:32<27:44, 17.39 examples/s]
|
||
Tokenizing train (num_proc=12): 34%|███▍ | 14790/43598 [02:33<19:52, 24.15 examples/s]
|
||
Tokenizing train (num_proc=12): 34%|███▍ | 14918/43598 [02:33<14:12, 33.65 examples/s]
|
||
Tokenizing train (num_proc=12): 35%|███▍ | 15046/43598 [02:33<10:07, 46.96 examples/s]
|
||
Tokenizing train (num_proc=12): 35%|███▍ | 15174/43598 [02:33<07:14, 65.43 examples/s]
|
||
Tokenizing train (num_proc=12): 35%|███▌ | 15302/43598 [02:33<05:11, 90.82 examples/s]
|
||
Tokenizing train (num_proc=12): 35%|███▌ | 15430/43598 [02:33<03:45, 124.93 examples/s]
|
||
Tokenizing train (num_proc=12): 36%|███▌ | 15558/43598 [02:33<02:45, 169.55 examples/s]
|
||
Tokenizing train (num_proc=12): 36%|███▌ | 15686/43598 [02:33<02:02, 227.23 examples/s]
|
||
Tokenizing train (num_proc=12): 36%|███▋ | 15814/43598 [02:33<01:33, 298.41 examples/s]
|
||
Tokenizing train (num_proc=12): 37%|███▋ | 15942/43598 [02:34<01:12, 379.94 examples/s]
|
||
Tokenizing train (num_proc=12): 37%|███▋ | 16070/43598 [02:34<00:58, 471.57 examples/s]
|
||
Tokenizing train (num_proc=12): 37%|███▋ | 16198/43598 [02:34<00:48, 562.07 examples/s]
|
||
Tokenizing train (num_proc=12): 37%|███▋ | 16326/43598 [02:34<00:41, 662.92 examples/s]
|
||
Tokenizing train (num_proc=12): 38%|███▊ | 16454/43598 [02:34<00:35, 758.42 examples/s]
|
||
Tokenizing train (num_proc=12): 38%|███▊ | 16582/43598 [02:34<00:32, 833.59 examples/s]
|
||
Tokenizing train (num_proc=12): 38%|███▊ | 16710/43598 [02:34<00:30, 882.52 examples/s]
|
||
Tokenizing train (num_proc=12): 39%|███▊ | 16838/43598 [02:34<00:28, 931.22 examples/s]
|
||
Tokenizing train (num_proc=12): 39%|███▉ | 16966/43598 [02:34<00:27, 986.24 examples/s]
|
||
Tokenizing train (num_proc=12): 39%|███▉ | 17094/43598 [02:35<00:26, 990.45 examples/s]
|
||
Tokenizing train (num_proc=12): 40%|███▉ | 17222/43598 [02:35<00:26, 1012.15 examples/s]
|
||
Tokenizing train (num_proc=12): 40%|███▉ | 17350/43598 [02:35<00:25, 1030.21 examples/s]
|
||
Tokenizing train (num_proc=12): 40%|████ | 17478/43598 [02:35<00:24, 1046.13 examples/s]
|
||
Tokenizing train (num_proc=12): 40%|████ | 17606/43598 [02:35<00:24, 1074.54 examples/s]
|
||
Tokenizing train (num_proc=12): 41%|████ | 17734/43598 [02:35<00:23, 1079.87 examples/s]
|
||
Tokenizing train (num_proc=12): 41%|████ | 17862/43598 [02:35<00:22, 1121.13 examples/s]
|
||
Tokenizing train (num_proc=12): 41%|████▏ | 17990/43598 [02:35<00:23, 1108.71 examples/s]
|
||
Tokenizing train (num_proc=12): 42%|████▏ | 18118/43598 [02:36<00:22, 1123.64 examples/s]
|
||
Tokenizing train (num_proc=12): 42%|████▏ | 18167/43598 [02:52<00:22, 1123.64 examples/s]
|
||
Tokenizing train (num_proc=12): 42%|████▏ | 18295/43598 [03:02<23:48, 17.72 examples/s]
|
||
Tokenizing train (num_proc=12): 43%|████▎ | 18551/43598 [03:02<13:28, 31.00 examples/s]
|
||
Tokenizing train (num_proc=12): 43%|████▎ | 18679/43598 [03:02<10:18, 40.30 examples/s]
|
||
Tokenizing train (num_proc=12): 43%|████▎ | 18807/43598 [03:03<07:45, 53.31 examples/s]
|
||
Tokenizing train (num_proc=12): 43%|████▎ | 18935/43598 [03:03<05:45, 71.38 examples/s]
|
||
Tokenizing train (num_proc=12): 44%|████▎ | 19063/43598 [03:03<04:16, 95.80 examples/s]
|
||
Tokenizing train (num_proc=12): 44%|████▍ | 19191/43598 [03:03<03:08, 129.51 examples/s]
|
||
Tokenizing train (num_proc=12): 44%|████▍ | 19319/43598 [03:03<02:20, 172.98 examples/s]
|
||
Tokenizing train (num_proc=12): 45%|████▍ | 19447/43598 [03:03<01:45, 228.71 examples/s]
|
||
Tokenizing train (num_proc=12): 45%|████▍ | 19575/43598 [03:03<01:20, 297.04 examples/s]
|
||
Tokenizing train (num_proc=12): 45%|████▌ | 19703/43598 [03:03<01:02, 379.85 examples/s]
|
||
Tokenizing train (num_proc=12): 45%|████▌ | 19831/43598 [03:03<00:50, 466.41 examples/s]
|
||
Tokenizing train (num_proc=12): 46%|████▌ | 19959/43598 [03:04<00:42, 557.32 examples/s]
|
||
Tokenizing train (num_proc=12): 46%|████▌ | 20087/43598 [03:04<00:36, 644.77 examples/s]
|
||
Tokenizing train (num_proc=12): 46%|████▋ | 20215/43598 [03:04<00:32, 720.26 examples/s]
|
||
Tokenizing train (num_proc=12): 47%|████▋ | 20343/43598 [03:04<00:29, 795.96 examples/s]
|
||
Tokenizing train (num_proc=12): 47%|████▋ | 20471/43598 [03:04<00:26, 889.36 examples/s]
|
||
Tokenizing train (num_proc=12): 47%|████▋ | 20599/43598 [03:04<00:25, 917.91 examples/s]
|
||
Tokenizing train (num_proc=12): 48%|████▊ | 20727/43598 [03:04<00:24, 938.69 examples/s]
|
||
Tokenizing train (num_proc=12): 48%|████▊ | 20855/43598 [03:04<00:24, 942.89 examples/s]
|
||
Tokenizing train (num_proc=12): 48%|████▊ | 20983/43598 [03:05<00:22, 984.40 examples/s]
|
||
Tokenizing train (num_proc=12): 48%|████▊ | 21111/43598 [03:05<00:21, 1033.17 examples/s]
|
||
Tokenizing train (num_proc=12): 49%|████▊ | 21239/43598 [03:05<00:20, 1070.43 examples/s]
|
||
Tokenizing train (num_proc=12): 49%|████▉ | 21367/43598 [03:05<00:19, 1112.91 examples/s]
|
||
Tokenizing train (num_proc=12): 49%|████▉ | 21495/43598 [03:05<00:19, 1123.82 examples/s]
|
||
Tokenizing train (num_proc=12): 50%|████▉ | 21623/43598 [03:05<00:19, 1117.60 examples/s]
|
||
Tokenizing train (num_proc=12): 50%|████▉ | 21751/43598 [03:05<00:19, 1104.00 examples/s]
|
||
Tokenizing train (num_proc=12): 50%|█████ | 21800/43598 [03:22<00:19, 1104.00 examples/s]
|
||
Tokenizing train (num_proc=12): 50%|█████ | 21928/43598 [03:31<20:04, 17.99 examples/s]
|
||
Tokenizing train (num_proc=12): 51%|█████ | 22184/43598 [03:32<11:19, 31.49 examples/s]
|
||
Tokenizing train (num_proc=12): 51%|█████ | 22312/43598 [03:32<08:40, 40.93 examples/s]
|
||
Tokenizing train (num_proc=12): 51%|█████▏ | 22440/43598 [03:32<06:30, 54.12 examples/s]
|
||
Tokenizing train (num_proc=12): 52%|█████▏ | 22568/43598 [03:32<04:50, 72.33 examples/s]
|
||
Tokenizing train (num_proc=12): 52%|█████▏ | 22696/43598 [03:32<03:34, 97.47 examples/s]
|
||
Tokenizing train (num_proc=12): 52%|█████▏ | 22824/43598 [03:32<02:38, 130.98 examples/s]
|
||
Tokenizing train (num_proc=12): 53%|█████▎ | 22952/43598 [03:32<01:58, 174.95 examples/s]
|
||
Tokenizing train (num_proc=12): 53%|█████▎ | 23080/43598 [03:32<01:28, 230.74 examples/s]
|
||
Tokenizing train (num_proc=12): 53%|█████▎ | 23208/43598 [03:33<01:07, 300.33 examples/s]
|
||
Tokenizing train (num_proc=12): 54%|█████▎ | 23336/43598 [03:33<00:53, 381.89 examples/s]
|
||
Tokenizing train (num_proc=12): 54%|█████▍ | 23464/43598 [03:33<00:42, 471.84 examples/s]
|
||
Tokenizing train (num_proc=12): 54%|█████▍ | 23592/43598 [03:33<00:35, 562.94 examples/s]
|
||
Tokenizing train (num_proc=12): 54%|█████▍ | 23720/43598 [03:33<00:30, 649.78 examples/s]
|
||
Tokenizing train (num_proc=12): 55%|█████▍ | 23848/43598 [03:33<00:26, 735.64 examples/s]
|
||
Tokenizing train (num_proc=12): 55%|█████▍ | 23976/43598 [03:33<00:24, 809.43 examples/s]
|
||
Tokenizing train (num_proc=12): 55%|█████▌ | 24104/43598 [03:33<00:22, 878.30 examples/s]
|
||
Tokenizing train (num_proc=12): 56%|█████▌ | 24232/43598 [03:34<00:20, 924.58 examples/s]
|
||
Tokenizing train (num_proc=12): 56%|█████▌ | 24360/43598 [03:34<00:19, 963.17 examples/s]
|
||
Tokenizing train (num_proc=12): 56%|█████▌ | 24488/43598 [03:34<00:18, 1009.62 examples/s]
|
||
Tokenizing train (num_proc=12): 56%|█████▋ | 24616/43598 [03:34<00:18, 1037.46 examples/s]
|
||
Tokenizing train (num_proc=12): 57%|█████▋ | 24744/43598 [03:34<00:17, 1064.13 examples/s]
|
||
Tokenizing train (num_proc=12): 57%|█████▋ | 24872/43598 [03:34<00:17, 1084.62 examples/s]
|
||
Tokenizing train (num_proc=12): 57%|█████▋ | 25000/43598 [03:34<00:16, 1115.84 examples/s]
|
||
Tokenizing train (num_proc=12): 58%|█████▊ | 25128/43598 [03:34<00:16, 1098.17 examples/s]
|
||
Tokenizing train (num_proc=12): 58%|█████▊ | 25256/43598 [03:34<00:16, 1089.67 examples/s]
|
||
Tokenizing train (num_proc=12): 58%|█████▊ | 25384/43598 [03:35<00:17, 1062.73 examples/s]
|
||
Tokenizing train (num_proc=12): 58%|█████▊ | 25433/43598 [03:52<00:17, 1062.73 examples/s]
|
||
Tokenizing train (num_proc=12): 59%|█████▊ | 25561/43598 [04:01<17:09, 17.52 examples/s]
|
||
Tokenizing train (num_proc=12): 59%|█████▉ | 25817/43598 [04:02<09:40, 30.65 examples/s]
|
||
Tokenizing train (num_proc=12): 60%|█████▉ | 25945/43598 [04:02<07:22, 39.87 examples/s]
|
||
Tokenizing train (num_proc=12): 60%|█████▉ | 26073/43598 [04:02<05:32, 52.76 examples/s]
|
||
Tokenizing train (num_proc=12): 60%|██████ | 26201/43598 [04:02<04:06, 70.67 examples/s]
|
||
Tokenizing train (num_proc=12): 60%|██████ | 26329/43598 [04:02<03:01, 95.26 examples/s]
|
||
Tokenizing train (num_proc=12): 61%|██████ | 26457/43598 [04:02<02:13, 128.57 examples/s]
|
||
Tokenizing train (num_proc=12): 61%|██████ | 26585/43598 [04:02<01:38, 172.56 examples/s]
|
||
Tokenizing train (num_proc=12): 61%|██████▏ | 26713/43598 [04:02<01:13, 228.41 examples/s]
|
||
Tokenizing train (num_proc=12): 62%|██████▏ | 26841/43598 [04:03<00:56, 294.84 examples/s]
|
||
Tokenizing train (num_proc=12): 62%|██████▏ | 26969/43598 [04:03<00:44, 375.95 examples/s]
|
||
Tokenizing train (num_proc=12): 62%|██████▏ | 27097/43598 [04:03<00:35, 464.00 examples/s]
|
||
Tokenizing train (num_proc=12): 62%|██████▏ | 27225/43598 [04:03<00:29, 559.27 examples/s]
|
||
Tokenizing train (num_proc=12): 63%|██████▎ | 27353/43598 [04:03<00:24, 653.89 examples/s]
|
||
Tokenizing train (num_proc=12): 63%|██████▎ | 27481/43598 [04:03<00:21, 737.73 examples/s]
|
||
Tokenizing train (num_proc=12): 63%|██████▎ | 27609/43598 [04:03<00:19, 808.47 examples/s]
|
||
Tokenizing train (num_proc=12): 64%|██████▎ | 27737/43598 [04:03<00:18, 874.77 examples/s]
|
||
Tokenizing train (num_proc=12): 64%|██████▍ | 27865/43598 [04:04<00:17, 905.08 examples/s]
|
||
Tokenizing train (num_proc=12): 64%|██████▍ | 27993/43598 [04:04<00:16, 940.43 examples/s]
|
||
Tokenizing train (num_proc=12): 65%|██████▍ | 28121/43598 [04:04<00:16, 933.48 examples/s]
|
||
Tokenizing train (num_proc=12): 65%|██████▍ | 28249/43598 [04:04<00:15, 966.63 examples/s]
|
||
Tokenizing train (num_proc=12): 65%|██████▌ | 28377/43598 [04:04<00:14, 1014.79 examples/s]
|
||
Tokenizing train (num_proc=12): 65%|██████▌ | 28505/43598 [04:04<00:14, 1056.34 examples/s]
|
||
Tokenizing train (num_proc=12): 66%|██████▌ | 28633/43598 [04:04<00:13, 1107.64 examples/s]
|
||
Tokenizing train (num_proc=12): 66%|██████▌ | 28761/43598 [04:04<00:13, 1119.41 examples/s]
|
||
Tokenizing train (num_proc=12): 66%|██████▋ | 28889/43598 [04:04<00:13, 1097.43 examples/s]
|
||
Tokenizing train (num_proc=12): 67%|██████▋ | 29017/43598 [04:05<00:13, 1077.16 examples/s]
|
||
Tokenizing train (num_proc=12): 67%|██████▋ | 29066/43598 [04:22<00:13, 1077.16 examples/s]
|
||
Tokenizing train (num_proc=12): 67%|██████▋ | 29194/43598 [04:31<13:28, 17.82 examples/s]
|
||
Tokenizing train (num_proc=12): 67%|██████▋ | 29322/43598 [04:31<09:42, 24.53 examples/s]
|
||
Tokenizing train (num_proc=12): 68%|██████▊ | 29450/43598 [04:31<06:56, 33.95 examples/s]
|
||
Tokenizing train (num_proc=12): 68%|██████▊ | 29578/43598 [04:31<04:57, 47.13 examples/s]
|
||
Tokenizing train (num_proc=12): 68%|██████▊ | 29706/43598 [04:31<03:32, 65.48 examples/s]
|
||
Tokenizing train (num_proc=12): 68%|██████▊ | 29834/43598 [04:32<02:31, 90.65 examples/s]
|
||
Tokenizing train (num_proc=12): 69%|██████▊ | 29962/43598 [04:32<01:49, 124.46 examples/s]
|
||
Tokenizing train (num_proc=12): 69%|██████▉ | 30090/43598 [04:32<01:19, 169.21 examples/s]
|
||
Tokenizing train (num_proc=12): 69%|██████▉ | 30218/43598 [04:32<00:59, 225.79 examples/s]
|
||
Tokenizing train (num_proc=12): 70%|██████▉ | 30346/43598 [04:32<00:44, 298.15 examples/s]
|
||
Tokenizing train (num_proc=12): 70%|██████▉ | 30474/43598 [04:32<00:34, 377.94 examples/s]
|
||
Tokenizing train (num_proc=12): 70%|███████ | 30602/43598 [04:32<00:27, 468.33 examples/s]
|
||
Tokenizing train (num_proc=12): 70%|███████ | 30730/43598 [04:32<00:22, 568.17 examples/s]
|
||
Tokenizing train (num_proc=12): 71%|███████ | 30858/43598 [04:33<00:19, 662.05 examples/s]
|
||
Tokenizing train (num_proc=12): 71%|███████ | 30986/43598 [04:33<00:16, 750.26 examples/s]
|
||
Tokenizing train (num_proc=12): 71%|███████▏ | 31114/43598 [04:33<00:15, 818.61 examples/s]
|
||
Tokenizing train (num_proc=12): 72%|███████▏ | 31242/43598 [04:33<00:14, 877.37 examples/s]
|
||
Tokenizing train (num_proc=12): 72%|███████▏ | 31370/43598 [04:33<00:13, 932.78 examples/s]
|
||
Tokenizing train (num_proc=12): 72%|███████▏ | 31498/43598 [04:33<00:12, 972.60 examples/s]
|
||
Tokenizing train (num_proc=12): 73%|███████▎ | 31626/43598 [04:33<00:12, 980.15 examples/s]
|
||
Tokenizing train (num_proc=12): 73%|███████▎ | 31754/43598 [04:33<00:11, 1025.13 examples/s]
|
||
Tokenizing train (num_proc=12): 73%|███████▎ | 31882/43598 [04:33<00:11, 1024.90 examples/s]
|
||
Tokenizing train (num_proc=12): 73%|███████▎ | 32010/43598 [04:34<00:11, 1046.67 examples/s]
|
||
Tokenizing train (num_proc=12): 74%|███████▎ | 32138/43598 [04:34<00:10, 1071.84 examples/s]
|
||
Tokenizing train (num_proc=12): 74%|███████▍ | 32266/43598 [04:34<00:10, 1052.02 examples/s]
|
||
Tokenizing train (num_proc=12): 74%|███████▍ | 32394/43598 [04:34<00:10, 1104.90 examples/s]
|
||
Tokenizing train (num_proc=12): 75%|███████▍ | 32522/43598 [04:34<00:10, 1080.84 examples/s]
|
||
Tokenizing train (num_proc=12): 75%|███████▍ | 32650/43598 [04:34<00:10, 1049.71 examples/s]
|
||
Tokenizing train (num_proc=12): 75%|███████▌ | 32699/43598 [04:52<00:10, 1049.71 examples/s]
|
||
Tokenizing train (num_proc=12): 75%|███████▌ | 32827/43598 [05:01<10:19, 17.40 examples/s]
|
||
Tokenizing train (num_proc=12): 76%|███████▌ | 32955/43598 [05:01<07:24, 23.93 examples/s]
|
||
Tokenizing train (num_proc=12): 76%|███████▌ | 33083/43598 [05:01<05:17, 33.14 examples/s]
|
||
Tokenizing train (num_proc=12): 76%|███████▌ | 33211/43598 [05:02<03:45, 46.03 examples/s]
|
||
Tokenizing train (num_proc=12): 76%|███████▋ | 33339/43598 [05:02<02:40, 63.88 examples/s]
|
||
Tokenizing train (num_proc=12): 77%|███████▋ | 33467/43598 [05:02<01:54, 88.36 examples/s]
|
||
Tokenizing train (num_proc=12): 77%|███████▋ | 33595/43598 [05:02<01:22, 121.51 examples/s]
|
||
Tokenizing train (num_proc=12): 77%|███████▋ | 33723/43598 [05:02<00:59, 165.11 examples/s]
|
||
Tokenizing train (num_proc=12): 78%|███████▊ | 33851/43598 [05:02<00:44, 219.94 examples/s]
|
||
Tokenizing train (num_proc=12): 78%|███████▊ | 33979/43598 [05:02<00:33, 288.03 examples/s]
|
||
Tokenizing train (num_proc=12): 78%|███████▊ | 34107/43598 [05:02<00:25, 369.02 examples/s]
|
||
Tokenizing train (num_proc=12): 79%|███████▊ | 34235/43598 [05:03<00:20, 462.81 examples/s]
|
||
Tokenizing train (num_proc=12): 79%|███████▉ | 34363/43598 [05:03<00:16, 553.63 examples/s]
|
||
Tokenizing train (num_proc=12): 79%|███████▉ | 34491/43598 [05:03<00:14, 650.29 examples/s]
|
||
Tokenizing train (num_proc=12): 79%|███████▉ | 34619/43598 [05:03<00:12, 725.15 examples/s]
|
||
Tokenizing train (num_proc=12): 80%|███████▉ | 34747/43598 [05:03<00:10, 811.79 examples/s]
|
||
Tokenizing train (num_proc=12): 80%|███████▉ | 34875/43598 [05:03<00:10, 858.37 examples/s]
|
||
Tokenizing train (num_proc=12): 80%|████████ | 35003/43598 [05:03<00:09, 922.39 examples/s]
|
||
Tokenizing train (num_proc=12): 81%|████████ | 35131/43598 [05:03<00:08, 972.27 examples/s]
|
||
Tokenizing train (num_proc=12): 81%|████████ | 35259/43598 [05:04<00:08, 984.05 examples/s]
|
||
Tokenizing train (num_proc=12): 81%|████████ | 35387/43598 [05:04<00:08, 1017.58 examples/s]
|
||
Tokenizing train (num_proc=12): 81%|████████▏ | 35515/43598 [05:04<00:07, 1037.76 examples/s]
|
||
Tokenizing train (num_proc=12): 82%|████████▏ | 35643/43598 [05:04<00:07, 1041.62 examples/s]
|
||
Tokenizing train (num_proc=12): 82%|████████▏ | 35771/43598 [05:04<00:07, 1073.37 examples/s]
|
||
Tokenizing train (num_proc=12): 82%|████████▏ | 35899/43598 [05:04<00:07, 1066.92 examples/s]
|
||
Tokenizing train (num_proc=12): 83%|████████▎ | 36027/43598 [05:04<00:07, 1070.25 examples/s]
|
||
Tokenizing train (num_proc=12): 83%|████████▎ | 36155/43598 [05:04<00:06, 1108.16 examples/s]
|
||
Tokenizing train (num_proc=12): 83%|████████▎ | 36283/43598 [05:04<00:06, 1130.68 examples/s]
|
||
Tokenizing train (num_proc=12): 83%|████████▎ | 36332/43598 [05:22<00:06, 1130.68 examples/s]
|
||
Tokenizing train (num_proc=12): 84%|████████▎ | 36460/43598 [05:30<06:34, 18.08 examples/s]
|
||
Tokenizing train (num_proc=12): 84%|████████▍ | 36716/43598 [05:31<03:37, 31.66 examples/s]
|
||
Tokenizing train (num_proc=12): 85%|████████▍ | 36844/43598 [05:31<02:44, 41.14 examples/s]
|
||
Tokenizing train (num_proc=12): 85%|████████▍ | 36972/43598 [05:31<02:01, 54.43 examples/s]
|
||
Tokenizing train (num_proc=12): 85%|████████▌ | 37100/43598 [05:31<01:29, 72.80 examples/s]
|
||
Tokenizing train (num_proc=12): 85%|████████▌ | 37228/43598 [05:31<01:04, 98.11 examples/s]
|
||
Tokenizing train (num_proc=12): 86%|████████▌ | 37356/43598 [05:31<00:47, 131.68 examples/s]
|
||
Tokenizing train (num_proc=12): 86%|████████▌ | 37484/43598 [05:31<00:34, 176.45 examples/s]
|
||
Tokenizing train (num_proc=12): 86%|████████▋ | 37612/43598 [05:31<00:25, 232.55 examples/s]
|
||
Tokenizing train (num_proc=12): 87%|████████▋ | 37740/43598 [05:32<00:19, 302.38 examples/s]
|
||
Tokenizing train (num_proc=12): 87%|████████▋ | 37868/43598 [05:32<00:14, 382.37 examples/s]
|
||
Tokenizing train (num_proc=12): 87%|████████▋ | 37996/43598 [05:32<00:11, 467.26 examples/s]
|
||
Tokenizing train (num_proc=12): 87%|████████▋ | 38124/43598 [05:32<00:09, 568.09 examples/s]
|
||
Tokenizing train (num_proc=12): 88%|████████▊ | 38252/43598 [05:32<00:08, 648.08 examples/s]
|
||
Tokenizing train (num_proc=12): 88%|████████▊ | 38380/43598 [05:32<00:07, 731.53 examples/s]
|
||
Tokenizing train (num_proc=12): 88%|████████▊ | 38508/43598 [05:32<00:06, 801.11 examples/s]
|
||
Tokenizing train (num_proc=12): 89%|████████▊ | 38636/43598 [05:32<00:05, 887.69 examples/s]
|
||
Tokenizing train (num_proc=12): 89%|████████▉ | 38764/43598 [05:33<00:05, 925.57 examples/s]
|
||
Tokenizing train (num_proc=12): 89%|████████▉ | 38892/43598 [05:33<00:04, 968.09 examples/s]
|
||
Tokenizing train (num_proc=12): 89%|████████▉ | 39020/43598 [05:33<00:04, 972.18 examples/s]
|
||
Tokenizing train (num_proc=12): 90%|████████▉ | 39148/43598 [05:33<00:04, 996.27 examples/s]
|
||
Tokenizing train (num_proc=12): 90%|█████████ | 39276/43598 [05:33<00:04, 1030.55 examples/s]
|
||
Tokenizing train (num_proc=12): 90%|█████████ | 39404/43598 [05:33<00:03, 1057.77 examples/s]
|
||
Tokenizing train (num_proc=12): 91%|█████████ | 39532/43598 [05:33<00:03, 1073.26 examples/s]
|
||
Tokenizing train (num_proc=12): 91%|█████████ | 39660/43598 [05:33<00:03, 1043.11 examples/s]
|
||
Tokenizing train (num_proc=12): 91%|█████████▏| 39788/43598 [05:34<00:03, 1101.34 examples/s]
|
||
Tokenizing train (num_proc=12): 92%|█████████▏| 39916/43598 [05:34<00:03, 1083.22 examples/s]
|
||
Tokenizing train (num_proc=12): 92%|█████████▏| 39965/43598 [05:52<00:03, 1083.22 examples/s]
|
||
Tokenizing train (num_proc=12): 92%|█████████▏| 40093/43598 [05:56<02:48, 20.74 examples/s]
|
||
Tokenizing train (num_proc=12): 92%|█████████▏| 40221/43598 [05:56<01:58, 28.52 examples/s]
|
||
Tokenizing train (num_proc=12): 93%|█████████▎| 40349/43598 [05:57<01:22, 39.47 examples/s]
|
||
Tokenizing train (num_proc=12): 93%|█████████▎| 40477/43598 [05:57<00:57, 54.73 examples/s]
|
||
Tokenizing train (num_proc=12): 93%|█████████▎| 40605/43598 [05:57<00:39, 75.86 examples/s]
|
||
Tokenizing train (num_proc=12): 93%|█████████▎| 40733/43598 [05:57<00:27, 104.74 examples/s]
|
||
Tokenizing train (num_proc=12): 94%|█████████▍| 40989/43598 [05:57<00:14, 180.04 examples/s]
|
||
Tokenizing train (num_proc=12): 94%|█████████▍| 41117/43598 [05:57<00:10, 227.18 examples/s]
|
||
Tokenizing train (num_proc=12): 95%|█████████▍| 41245/43598 [05:57<00:08, 285.77 examples/s]
|
||
Tokenizing train (num_proc=12): 95%|█████████▍| 41373/43598 [05:57<00:06, 359.93 examples/s]
|
||
Tokenizing train (num_proc=12): 95%|█████████▌| 41501/43598 [05:57<00:04, 446.74 examples/s]
|
||
Tokenizing train (num_proc=12): 95%|█████████▌| 41629/43598 [05:58<00:03, 542.58 examples/s]
|
||
Tokenizing train (num_proc=12): 96%|█████████▌| 41757/43598 [05:58<00:02, 643.05 examples/s]
|
||
Tokenizing train (num_proc=12): 96%|█████████▌| 41885/43598 [05:58<00:02, 746.92 examples/s]
|
||
Tokenizing train (num_proc=12): 96%|█████████▋| 42013/43598 [05:58<00:01, 839.68 examples/s]
|
||
Tokenizing train (num_proc=12): 97%|█████████▋| 42141/43598 [05:58<00:01, 915.45 examples/s]
|
||
Tokenizing train (num_proc=12): 97%|█████████▋| 42269/43598 [05:58<00:01, 983.80 examples/s]
|
||
Tokenizing train (num_proc=12): 98%|█████████▊| 42525/43598 [05:58<00:00, 1083.80 examples/s]
|
||
Tokenizing train (num_proc=12): 98%|█████████▊| 42653/43598 [05:58<00:00, 1106.12 examples/s]
|
||
Tokenizing train (num_proc=12): 98%|█████████▊| 42781/43598 [05:59<00:00, 1134.68 examples/s]
|
||
Tokenizing train (num_proc=12): 98%|█████████▊| 42909/43598 [05:59<00:00, 1158.98 examples/s]
|
||
Tokenizing train (num_proc=12): 99%|█████████▊| 43037/43598 [05:59<00:00, 1167.18 examples/s]
|
||
Tokenizing train (num_proc=12): 99%|█████████▉| 43293/43598 [05:59<00:00, 1256.29 examples/s]
|
||
Tokenizing train (num_proc=12): 100%|█████████▉| 43421/43598 [05:59<00:00, 1225.32 examples/s]
|
||
Tokenizing train (num_proc=12): 100%|█████████▉| 43549/43598 [05:59<00:00, 1232.31 examples/s]Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfsda5f551da15d52da00001c98'
|
||
|
||
Tokenizing train (num_proc=12): 100%|██████████| 43598/43598 [05:59<00:00, 121.11 examples/s]
|
||
[WARNING|trainer.py:816] 2026-04-10 17:27:57,834 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
|
||
Saving the dataset (0/2 shards): 0%| | 0/43598 [00:00<?, ? examples/s]
|
||
Saving the dataset (0/2 shards): 18%|█▊ | 8000/43598 [00:00<00:00, 63762.12 examples/s]
|
||
Saving the dataset (0/2 shards): 39%|███▉ | 17000/43598 [00:00<00:00, 69974.98 examples/s]
|
||
Saving the dataset (1/2 shards): 50%|█████ | 21799/43598 [00:00<00:00, 69974.98 examples/s]
|
||
Saving the dataset (1/2 shards): 57%|█████▋ | 24799/43598 [00:00<00:00, 34603.85 examples/s]
|
||
Saving the dataset (1/2 shards): 78%|███████▊ | 33799/43598 [00:00<00:00, 44932.83 examples/s]
|
||
Saving the dataset (1/2 shards): 91%|█████████▏| 39799/43598 [00:00<00:00, 47763.02 examples/s]
|
||
Saving the dataset (2/2 shards): 100%|██████████| 43598/43598 [00:01<00:00, 47763.02 examples/s]
|
||
Saving the dataset (2/2 shards): 100%|██████████| 43598/43598 [00:01<00:00, 39385.00 examples/s]
|
||
[WARNING|trainer.py:816] 2026-04-10 17:27:59,772 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
|
||
Tokenizing test (num_proc=12): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Tokenizing test (num_proc=12): 5%|▌ | 128/2339 [00:39<11:27, 3.21 examples/s]
|
||
Tokenizing test (num_proc=12): 14%|█▍ | 323/2339 [01:12<07:10, 4.69 examples/s]
|
||
Tokenizing test (num_proc=12): 22%|██▏ | 518/2339 [01:45<05:46, 5.26 examples/s]
|
||
Tokenizing test (num_proc=12): 30%|███ | 713/2339 [02:18<04:56, 5.49 examples/s]
|
||
Tokenizing test (num_proc=12): 39%|███▉ | 908/2339 [02:51<04:12, 5.66 examples/s]
|
||
Tokenizing test (num_proc=12): 47%|████▋ | 1103/2339 [03:23<03:34, 5.76 examples/s]
|
||
Tokenizing test (num_proc=12): 55%|█████▌ | 1298/2339 [03:56<02:58, 5.83 examples/s]
|
||
Tokenizing test (num_proc=12): 64%|██████▍ | 1493/2339 [04:29<02:24, 5.86 examples/s]
|
||
Tokenizing test (num_proc=12): 72%|███████▏ | 1688/2339 [05:02<01:51, 5.85 examples/s]
|
||
Tokenizing test (num_proc=12): 81%|████████ | 1883/2339 [05:36<01:17, 5.86 examples/s]
|
||
Tokenizing test (num_proc=12): 89%|████████▉ | 2078/2339 [06:09<00:44, 5.87 examples/s]
|
||
Tokenizing test (num_proc=12): 97%|█████████▋| 2273/2339 [06:38<00:10, 6.09 examples/s]Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
||
self.run()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
||
self._target(*self._args, **self._kwargs)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
||
server.serve_forever()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
||
sys.exit(0)
|
||
SystemExit: 0
|
||
|
||
During handling of the above exception, another exception occurred:
|
||
|
||
Traceback (most recent call last):
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
||
finalizer()
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
||
res = self._callback(*self._args, **self._kwargs)
|
||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
||
rmtree(tempdir)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
||
_rmtree_safe_fd(fd, path, onerror)
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
||
onerror(os.unlink, fullname, sys.exc_info())
|
||
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
||
os.unlink(entry.name, dir_fd=topfd)
|
||
OSError: [Errno 16] Device or resource busy: '.nfs6c9aa8faaa30bdd000001c99'
|
||
|
||
Tokenizing test (num_proc=12): 100%|██████████| 2339/2339 [06:38<00:00, 5.87 examples/s]
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:25,272 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
|
||
Saving the dataset (0/1 shards): 0%| | 0/2339 [00:00<?, ? examples/s]
|
||
Saving the dataset (1/1 shards): 100%|██████████| 2339/2339 [00:00<00:00, 25844.59 examples/s]
|
||
Saving the dataset (1/1 shards): 100%|██████████| 2339/2339 [00:00<00:00, 25802.38 examples/s]
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `MarginDPOTrainer.__init__`. Use `processing_class` instead.
|
||
super().__init__(
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,156 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,157 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,158 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,158 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,159 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,159 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,160 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,457 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,457 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,457 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,457 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,458 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,458 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,458 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,458 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,459 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,459 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,459 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,459 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,459 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,460 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,503 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,503 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,503 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `MarginDPOTrainer.__init__`. Use `processing_class` instead.
|
||
super().__init__(
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `MarginDPOTrainer.__init__`. Use `processing_class` instead.
|
||
super().__init__(
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `MarginDPOTrainer.__init__`. Use `processing_class` instead.
|
||
super().__init__(
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,503 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,503 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,503 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
[WARNING|trainer.py:816] 2026-04-10 17:35:28,503 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `MarginDPOTrainer.__init__`. Use `processing_class` instead.
|
||
super().__init__(
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `MarginDPOTrainer.__init__`. Use `processing_class` instead.
|
||
super().__init__(
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `MarginDPOTrainer.__init__`. Use `processing_class` instead.
|
||
super().__init__(
|
||
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `MarginDPOTrainer.__init__`. Use `processing_class` instead.
|
||
super().__init__(
|
||
[INFO|trainer.py:748] 2026-04-10 17:35:28,762 >> Using auto half precision backend
|
||
/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaForCausalLM because mixed precision turned on in FSDP. Affects: model.embed_tokens.weight, model.norm.weight, lm_head.weight.
|
||
warnings.warn(
|
||
/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaDecoderLayer because mixed precision turned on in FSDP. Affects: self_attn.q_proj.weight, self_attn.k_proj.weight, self_attn.v_proj.weight, self_attn.o_proj.weight, mlp.gate_proj.weight, mlp.up_proj.weight, mlp.down_proj.weight, input_layernorm.weight, post_attention_layernorm.weight.
|
||
warnings.warn(
|
||
/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/accelerate/accelerator.py:1563: UserWarning: FSDP upcast of low precision parameters may affect the precision of model checkpoints.
|
||
warnings.warn(
|
||
[INFO|trainer.py:2414] 2026-04-10 17:35:33,058 >> ***** Running training *****
|
||
[INFO|trainer.py:2415] 2026-04-10 17:35:33,058 >> Num examples = 43,598
|
||
[INFO|trainer.py:2416] 2026-04-10 17:35:33,058 >> Num Epochs = 1
|
||
[INFO|trainer.py:2417] 2026-04-10 17:35:33,058 >> Instantaneous batch size per device = 16
|
||
[INFO|trainer.py:2420] 2026-04-10 17:35:33,058 >> Total train batch size (w. parallel, distributed & accumulation) = 128
|
||
[INFO|trainer.py:2421] 2026-04-10 17:35:33,058 >> Gradient Accumulation steps = 1
|
||
[INFO|trainer.py:2422] 2026-04-10 17:35:33,058 >> Total optimization steps = 340
|
||
[INFO|trainer.py:2423] 2026-04-10 17:35:33,058 >> Number of trainable parameters = 1,003,782,656
|
||
[INFO|integration_utils.py:831] 2026-04-10 17:35:33,059 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
|
||
wandb: Currently logged in as: can-not-fand (can-not-fand-northeastern-university). Use `wandb login --relogin` to force relogin
|
||
wandb: wandb version 0.25.1 is available! To upgrade, please run:
|
||
wandb: $ pip install wandb --upgrade
|
||
wandb: Tracking run with wandb version 0.17.5
|
||
wandb: Run data is saved locally in /scratch/feng.yulu/dynamic-dpo-v4/wandb/wandb/run-20260410_173535-wep2te2x
|
||
wandb: Run `wandb offline` to turn off syncing.
|
||
wandb: Syncing run llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009
|
||
wandb: ⭐️ View project at https://wandb.ai/can-not-fand-northeastern-university/huggingface
|
||
wandb: 🚀 View run at https://wandb.ai/can-not-fand-northeastern-university/huggingface/runs/wep2te2x
|
||
|
||
0%| | 0/340 [00:00<?, ?it/s][WARNING|modeling_utils.py:1713] 2026-04-10 17:35:41,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
||
[WARNING|modeling_utils.py:1713] 2026-04-10 17:35:41,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
||
[WARNING|modeling_utils.py:1713] 2026-04-10 17:35:41,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
||
[WARNING|modeling_utils.py:1713] 2026-04-10 17:35:41,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
||
[WARNING|modeling_utils.py:1713] 2026-04-10 17:35:41,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
||
[WARNING|modeling_utils.py:1713] 2026-04-10 17:35:41,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
||
[WARNING|modeling_utils.py:1713] 2026-04-10 17:35:41,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
||
[WARNING|modeling_utils.py:1713] 2026-04-10 17:35:41,810 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
||
|
||
0%| | 1/340 [00:03<18:07, 3.21s/it]
|
||
|
||
{'loss': 0.6938, 'grad_norm': 23.717201232910156, 'learning_rate': 0.0, 'margin_dpo/margin_mean': -0.0843656063079834, 'margin_dpo/margin_std': 0.20181308686733246, 'logps/chosen': -72.44038391113281, 'logps/rejected': -70.95858764648438, 'logps/ref_chosen': -72.42105865478516, 'logps/ref_rejected': -71.02362823486328, 'logits/chosen': -0.4739703834056854, 'logits/rejected': -0.44689586758613586, 'epoch': 0.0}
|
||
|
||
0%| | 1/340 [00:03<18:07, 3.21s/it]
|
||
1%| | 2/340 [00:06<16:53, 3.00s/it]
|
||
1%| | 3/340 [00:08<16:07, 2.87s/it]
|
||
1%| | 4/340 [00:11<15:08, 2.70s/it]
|
||
1%|▏ | 5/340 [00:13<14:57, 2.68s/it]
|
||
|
||
{'loss': 0.6943, 'grad_norm': 24.15522003173828, 'learning_rate': 5.88235294117647e-08, 'margin_dpo/margin_mean': -0.0912436842918396, 'margin_dpo/margin_std': 0.36911237239837646, 'logps/chosen': -76.55665588378906, 'logps/rejected': -71.69610595703125, 'logps/ref_chosen': -76.4837875366211, 'logps/ref_rejected': -71.7144775390625, 'logits/chosen': -0.5054930448532104, 'logits/rejected': -0.4999650716781616, 'epoch': 0.01}
|
||
|
||
1%|▏ | 5/340 [00:13<14:57, 2.68s/it]
|
||
2%|▏ | 6/340 [00:16<14:51, 2.67s/it]
|
||
2%|▏ | 7/340 [00:19<14:46, 2.66s/it]
|
||
2%|▏ | 8/340 [00:21<14:29, 2.62s/it]
|
||
3%|▎ | 9/340 [00:24<14:27, 2.62s/it]
|
||
3%|▎ | 10/340 [00:26<14:24, 2.62s/it]
|
||
|
||
{'loss': 0.6933, 'grad_norm': 23.068735122680664, 'learning_rate': 1.3235294117647057e-07, 'margin_dpo/margin_mean': 0.0031534195877611637, 'margin_dpo/margin_std': 0.3234597444534302, 'logps/chosen': -76.17481994628906, 'logps/rejected': -73.90404510498047, 'logps/ref_chosen': -76.15269470214844, 'logps/ref_rejected': -73.87877655029297, 'logits/chosen': -0.5124594569206238, 'logits/rejected': -0.49317699670791626, 'epoch': 0.03}
|
||
|
||
3%|▎ | 10/340 [00:26<14:24, 2.62s/it]
|
||
3%|▎ | 11/340 [00:29<14:27, 2.64s/it]
|
||
4%|▎ | 12/340 [00:32<14:26, 2.64s/it]
|
||
4%|▍ | 13/340 [00:34<14:25, 2.65s/it]
|
||
4%|▍ | 14/340 [00:37<14:16, 2.63s/it]
|
||
4%|▍ | 15/340 [00:40<14:10, 2.62s/it]
|
||
|
||
{'loss': 0.6898, 'grad_norm': 28.796030044555664, 'learning_rate': 2.0588235294117645e-07, 'margin_dpo/margin_mean': 0.09566803276538849, 'margin_dpo/margin_std': 0.3500857353210449, 'logps/chosen': -67.05145263671875, 'logps/rejected': -73.06277465820312, 'logps/ref_chosen': -67.0902099609375, 'logps/ref_rejected': -73.005859375, 'logits/chosen': -0.5413268208503723, 'logits/rejected': -0.5226410031318665, 'epoch': 0.04}
|
||
|
||
4%|▍ | 15/340 [00:40<14:10, 2.62s/it]
|
||
5%|▍ | 16/340 [00:42<14:14, 2.64s/it]
|
||
5%|▌ | 17/340 [00:45<14:03, 2.61s/it]
|
||
5%|▌ | 18/340 [00:47<13:58, 2.60s/it]
|
||
6%|▌ | 19/340 [00:50<13:48, 2.58s/it]
|
||
6%|▌ | 20/340 [00:53<13:52, 2.60s/it]
|
||
|
||
{'loss': 0.6824, 'grad_norm': 20.94307518005371, 'learning_rate': 2.7941176470588235e-07, 'margin_dpo/margin_mean': 0.19857604801654816, 'margin_dpo/margin_std': 0.378338098526001, 'logps/chosen': -73.87080383300781, 'logps/rejected': -80.62101745605469, 'logps/ref_chosen': -73.9133071899414, 'logps/ref_rejected': -80.46495056152344, 'logits/chosen': -0.5276651382446289, 'logits/rejected': -0.5001177787780762, 'epoch': 0.06}
|
||
|
||
6%|▌ | 20/340 [00:53<13:52, 2.60s/it]
|
||
6%|▌ | 21/340 [00:55<14:02, 2.64s/it]
|
||
6%|▋ | 22/340 [00:58<14:01, 2.65s/it]
|
||
7%|▋ | 23/340 [01:01<13:57, 2.64s/it]
|
||
7%|▋ | 24/340 [01:03<14:09, 2.69s/it]
|
||
7%|▋ | 25/340 [01:06<13:57, 2.66s/it]
|
||
|
||
{'loss': 0.6642, 'grad_norm': 24.610126495361328, 'learning_rate': 3.529411764705882e-07, 'margin_dpo/margin_mean': 0.44518008828163147, 'margin_dpo/margin_std': 0.6063351631164551, 'logps/chosen': -60.977256774902344, 'logps/rejected': -74.73905181884766, 'logps/ref_chosen': -61.014869689941406, 'logps/ref_rejected': -74.33148193359375, 'logits/chosen': -0.5061219930648804, 'logits/rejected': -0.5009726285934448, 'epoch': 0.07}
|
||
|
||
7%|▋ | 25/340 [01:06<13:57, 2.66s/it]
|
||
8%|▊ | 26/340 [01:09<13:46, 2.63s/it]
|
||
8%|▊ | 27/340 [01:11<13:27, 2.58s/it]
|
||
8%|▊ | 28/340 [01:14<13:25, 2.58s/it]
|
||
9%|▊ | 29/340 [01:16<13:26, 2.59s/it]
|
||
9%|▉ | 30/340 [01:19<13:29, 2.61s/it]
|
||
|
||
{'loss': 0.6294, 'grad_norm': 21.515533447265625, 'learning_rate': 4.264705882352941e-07, 'margin_dpo/margin_mean': 1.5730347633361816, 'margin_dpo/margin_std': 1.7553781270980835, 'logps/chosen': -78.83164978027344, 'logps/rejected': -83.10078430175781, 'logps/ref_chosen': -78.80770111083984, 'logps/ref_rejected': -81.50379943847656, 'logits/chosen': -0.5904145240783691, 'logits/rejected': -0.5685775279998779, 'epoch': 0.09}
|
||
|
||
9%|▉ | 30/340 [01:19<13:29, 2.61s/it]
|
||
9%|▉ | 31/340 [01:22<13:27, 2.61s/it]
|
||
9%|▉ | 32/340 [01:24<13:32, 2.64s/it]
|
||
10%|▉ | 33/340 [01:27<13:27, 2.63s/it]
|
||
10%|█ | 34/340 [01:29<13:11, 2.59s/it]
|
||
10%|█ | 35/340 [01:32<13:14, 2.60s/it]
|
||
|
||
{'loss': 0.6028, 'grad_norm': 19.351747512817383, 'learning_rate': 5e-07, 'margin_dpo/margin_mean': 2.158336877822876, 'margin_dpo/margin_std': 2.8764147758483887, 'logps/chosen': -86.93069458007812, 'logps/rejected': -88.55570220947266, 'logps/ref_chosen': -86.67269134521484, 'logps/ref_rejected': -86.13935852050781, 'logits/chosen': -0.5566071271896362, 'logits/rejected': -0.5428273677825928, 'epoch': 0.1}
|
||
|
||
10%|█ | 35/340 [01:32<13:14, 2.60s/it]
|
||
11%|█ | 36/340 [01:35<13:13, 2.61s/it]
|
||
11%|█ | 37/340 [01:37<13:09, 2.61s/it]
|
||
11%|█ | 38/340 [01:40<13:05, 2.60s/it]
|
||
11%|█▏ | 39/340 [01:42<12:59, 2.59s/it]
|
||
12%|█▏ | 40/340 [01:45<13:05, 2.62s/it]
|
||
|
||
{'loss': 0.5446, 'grad_norm': 18.829681396484375, 'learning_rate': 4.996706849759452e-07, 'margin_dpo/margin_mean': 4.941764831542969, 'margin_dpo/margin_std': 8.191742897033691, 'logps/chosen': -71.7585220336914, 'logps/rejected': -91.31529235839844, 'logps/ref_chosen': -69.31690216064453, 'logps/ref_rejected': -83.9319076538086, 'logits/chosen': -0.6493271589279175, 'logits/rejected': -0.6133594512939453, 'epoch': 0.12}
|
||
|
||
12%|█▏ | 40/340 [01:45<13:05, 2.62s/it]
|
||
12%|█▏ | 41/340 [01:48<13:08, 2.64s/it]
|
||
12%|█▏ | 42/340 [01:50<13:01, 2.62s/it]
|
||
13%|█▎ | 43/340 [01:53<12:56, 2.62s/it]
|
||
13%|█▎ | 44/340 [01:55<12:47, 2.59s/it]
|
||
13%|█▎ | 45/340 [01:58<12:39, 2.58s/it]
|
||
|
||
{'loss': 0.553, 'grad_norm': 23.498613357543945, 'learning_rate': 4.986836074908615e-07, 'margin_dpo/margin_mean': 5.294968128204346, 'margin_dpo/margin_std': 6.769883632659912, 'logps/chosen': -73.5013427734375, 'logps/rejected': -108.92988586425781, 'logps/ref_chosen': -69.97550964355469, 'logps/ref_rejected': -100.10908508300781, 'logits/chosen': -0.6821354627609253, 'logits/rejected': -0.6494560837745667, 'epoch': 0.13}
|
||
|
||
13%|█▎ | 45/340 [01:58<12:39, 2.58s/it]
|
||
14%|█▎ | 46/340 [02:01<12:46, 2.61s/it]
|
||
14%|█▍ | 47/340 [02:03<13:03, 2.67s/it]
|
||
14%|█▍ | 48/340 [02:06<12:55, 2.66s/it]
|
||
14%|█▍ | 49/340 [02:09<12:45, 2.63s/it]
|
||
15%|█▍ | 50/340 [02:11<12:30, 2.59s/it]
|
||
|
||
{'loss': 0.5518, 'grad_norm': 30.29952621459961, 'learning_rate': 4.970413680203148e-07, 'margin_dpo/margin_mean': 4.282275199890137, 'margin_dpo/margin_std': 7.439302921295166, 'logps/chosen': -78.32559967041016, 'logps/rejected': -95.23252868652344, 'logps/ref_chosen': -72.90187072753906, 'logps/ref_rejected': -85.52653503417969, 'logits/chosen': -0.6595835089683533, 'logits/rejected': -0.6233135461807251, 'epoch': 0.15}
|
||
|
||
15%|█▍ | 50/340 [02:11<12:30, 2.59s/it]
|
||
15%|█▌ | 51/340 [02:14<12:30, 2.60s/it]
|
||
15%|█▌ | 52/340 [02:16<12:15, 2.55s/it]
|
||
16%|█▌ | 53/340 [02:19<12:12, 2.55s/it]
|
||
16%|█▌ | 54/340 [02:21<12:21, 2.59s/it]
|
||
16%|█▌ | 55/340 [02:24<12:17, 2.59s/it]
|
||
|
||
{'loss': 0.5112, 'grad_norm': 23.780656814575195, 'learning_rate': 4.947482930773511e-07, 'margin_dpo/margin_mean': 7.125207424163818, 'margin_dpo/margin_std': 9.734245300292969, 'logps/chosen': -91.6336898803711, 'logps/rejected': -109.0378646850586, 'logps/ref_chosen': -87.45826721191406, 'logps/ref_rejected': -97.73722076416016, 'logits/chosen': -0.7151781916618347, 'logits/rejected': -0.6897321939468384, 'epoch': 0.16}
|
||
|
||
16%|█▌ | 55/340 [02:24<12:17, 2.59s/it]
|
||
16%|█▋ | 56/340 [02:27<12:20, 2.61s/it]
|
||
17%|█▋ | 57/340 [02:29<12:23, 2.63s/it]
|
||
17%|█▋ | 58/340 [02:32<12:20, 2.63s/it]
|
||
17%|█▋ | 59/340 [02:35<12:15, 2.62s/it]
|
||
18%|█▊ | 60/340 [02:37<12:20, 2.64s/it]
|
||
|
||
{'loss': 0.5286, 'grad_norm': 20.72915267944336, 'learning_rate': 4.918104238142103e-07, 'margin_dpo/margin_mean': 6.065438747406006, 'margin_dpo/margin_std': 10.341069221496582, 'logps/chosen': -110.2301254272461, 'logps/rejected': -99.53703308105469, 'logps/ref_chosen': -106.60343933105469, 'logps/ref_rejected': -89.84490203857422, 'logits/chosen': -0.6631725430488586, 'logits/rejected': -0.6214786767959595, 'epoch': 0.18}
|
||
|
||
18%|█▊ | 60/340 [02:37<12:20, 2.64s/it]
|
||
18%|█▊ | 61/340 [02:40<12:03, 2.59s/it]
|
||
18%|█▊ | 62/340 [02:42<12:06, 2.61s/it]
|
||
19%|█▊ | 63/340 [02:45<12:05, 2.62s/it]
|
||
19%|█▉ | 64/340 [02:48<12:03, 2.62s/it]
|
||
19%|█▉ | 65/340 [02:50<11:53, 2.59s/it]
|
||
|
||
{'loss': 0.4746, 'grad_norm': 16.05661392211914, 'learning_rate': 4.882355001067891e-07, 'margin_dpo/margin_mean': 5.947785377502441, 'margin_dpo/margin_std': 7.2523908615112305, 'logps/chosen': -79.79920959472656, 'logps/rejected': -93.5802001953125, 'logps/ref_chosen': -76.7091064453125, 'logps/ref_rejected': -84.54231262207031, 'logits/chosen': -0.6507592797279358, 'logits/rejected': -0.6253207921981812, 'epoch': 0.19}
|
||
|
||
19%|█▉ | 65/340 [02:50<11:53, 2.59s/it]
|
||
19%|█▉ | 66/340 [02:53<11:43, 2.57s/it]
|
||
20%|█▉ | 67/340 [02:55<11:41, 2.57s/it]
|
||
20%|██ | 68/340 [02:58<11:40, 2.57s/it]
|
||
20%|██ | 69/340 [03:00<11:27, 2.54s/it]
|
||
21%|██ | 70/340 [03:03<11:36, 2.58s/it]
|
||
|
||
{'loss': 0.4662, 'grad_norm': 16.453359603881836, 'learning_rate': 4.840329401637809e-07, 'margin_dpo/margin_mean': 8.28502082824707, 'margin_dpo/margin_std': 8.248537063598633, 'logps/chosen': -74.00252532958984, 'logps/rejected': -103.95845031738281, 'logps/ref_chosen': -70.0877914428711, 'logps/ref_rejected': -91.75868225097656, 'logits/chosen': -0.698811411857605, 'logits/rejected': -0.6621960401535034, 'epoch': 0.21}
|
||
|
||
21%|██ | 70/340 [03:03<11:36, 2.58s/it]
|
||
21%|██ | 71/340 [03:06<11:36, 2.59s/it]
|
||
21%|██ | 72/340 [03:08<11:49, 2.65s/it]
|
||
21%|██▏ | 73/340 [03:11<11:42, 2.63s/it]
|
||
22%|██▏ | 74/340 [03:14<11:34, 2.61s/it]
|
||
22%|██▏ | 75/340 [03:16<11:29, 2.60s/it]
|
||
|
||
{'loss': 0.4863, 'grad_norm': 17.00535011291504, 'learning_rate': 4.792138157142157e-07, 'margin_dpo/margin_mean': 8.173115730285645, 'margin_dpo/margin_std': 8.817681312561035, 'logps/chosen': -78.68012237548828, 'logps/rejected': -97.5809555053711, 'logps/ref_chosen': -74.91792297363281, 'logps/ref_rejected': -85.64566802978516, 'logits/chosen': -0.6827956438064575, 'logits/rejected': -0.6566829681396484, 'epoch': 0.22}
|
||
|
||
22%|██▏ | 75/340 [03:16<11:29, 2.60s/it]
|
||
22%|██▏ | 76/340 [03:19<11:28, 2.61s/it]
|
||
23%|██▎ | 77/340 [03:21<11:27, 2.62s/it]
|
||
23%|██▎ | 78/340 [03:24<11:23, 2.61s/it]
|
||
23%|██▎ | 79/340 [03:27<11:19, 2.60s/it]
|
||
24%|██▎ | 80/340 [03:29<11:12, 2.59s/it]
|
||
|
||
{'loss': 0.451, 'grad_norm': 21.13958168029785, 'learning_rate': 4.737908228387656e-07, 'margin_dpo/margin_mean': 7.951646327972412, 'margin_dpo/margin_std': 8.248537063598633, 'logps/chosen': -102.5855941772461, 'logps/rejected': -105.6670150756836, 'logps/ref_chosen': -97.75636291503906, 'logps/ref_rejected': -92.88613891601562, 'logits/chosen': -0.7372442483901978, 'logits/rejected': -0.689995288848877, 'epoch': 0.24}
|
||
|
||
24%|██▎ | 80/340 [03:29<11:12, 2.59s/it]
|
||
24%|██▍ | 81/340 [03:32<11:13, 2.60s/it]
|
||
24%|██▍ | 82/340 [03:34<11:00, 2.56s/it]
|
||
24%|██▍ | 83/340 [03:37<10:48, 2.53s/it]
|
||
25%|██▍ | 84/340 [03:39<10:53, 2.55s/it]
|
||
25%|██▌ | 85/340 [03:42<10:59, 2.59s/it]
|
||
|
||
{'loss': 0.4569, 'grad_norm': 18.165218353271484, 'learning_rate': 4.6777824852166437e-07, 'margin_dpo/margin_mean': 7.221736907958984, 'margin_dpo/margin_std': 8.439001083374023, 'logps/chosen': -85.70280456542969, 'logps/rejected': -101.9955825805664, 'logps/ref_chosen': -78.9326171875, 'logps/ref_rejected': -88.00363159179688, 'logits/chosen': -0.6671745777130127, 'logits/rejected': -0.6385531425476074, 'epoch': 0.25}
|
||
|
||
25%|██▌ | 85/340 [03:42<10:59, 2.59s/it]
|
||
25%|██▌ | 86/340 [03:45<10:57, 2.59s/it]
|
||
26%|██▌ | 87/340 [03:47<10:51, 2.57s/it]
|
||
26%|██▌ | 88/340 [03:50<10:45, 2.56s/it]
|
||
26%|██▌ | 89/340 [03:52<10:47, 2.58s/it]
|
||
26%|██▋ | 90/340 [03:55<10:44, 2.58s/it]
|
||
|
||
{'loss': 0.4419, 'grad_norm': 20.739215850830078, 'learning_rate': 4.611919330113591e-07, 'margin_dpo/margin_mean': 9.419827461242676, 'margin_dpo/margin_std': 9.238184928894043, 'logps/chosen': -84.86643981933594, 'logps/rejected': -105.78071594238281, 'logps/ref_chosen': -78.78388214111328, 'logps/ref_rejected': -90.2783203125, 'logits/chosen': -0.6510001420974731, 'logits/rejected': -0.629525899887085, 'epoch': 0.26}
|
||
|
||
26%|██▋ | 90/340 [03:55<10:44, 2.58s/it]
|
||
27%|██▋ | 91/340 [03:57<10:44, 2.59s/it]
|
||
27%|██▋ | 92/340 [04:00<10:53, 2.63s/it]
|
||
27%|██▋ | 93/340 [04:03<10:48, 2.62s/it]
|
||
28%|██▊ | 94/340 [04:05<10:40, 2.60s/it]
|
||
28%|██▊ | 95/340 [04:08<10:39, 2.61s/it]
|
||
|
||
{'loss': 0.4514, 'grad_norm': 17.511486053466797, 'learning_rate': 4.5404922808905543e-07, 'margin_dpo/margin_mean': 7.360299587249756, 'margin_dpo/margin_std': 11.319549560546875, 'logps/chosen': -74.32402038574219, 'logps/rejected': -78.22425842285156, 'logps/ref_chosen': -65.91403198242188, 'logps/ref_rejected': -62.45396041870117, 'logits/chosen': -0.6517031788825989, 'logits/rejected': -0.6104840040206909, 'epoch': 0.28}
|
||
|
||
28%|██▊ | 95/340 [04:08<10:39, 2.61s/it]
|
||
28%|██▊ | 96/340 [04:11<10:49, 2.66s/it]
|
||
29%|██▊ | 97/340 [04:13<10:33, 2.61s/it]
|
||
29%|██▉ | 98/340 [04:16<10:33, 2.62s/it]
|
||
29%|██▉ | 99/340 [04:18<10:26, 2.60s/it]
|
||
29%|██▉ | 100/340 [04:21<10:27, 2.61s/it]
|
||
|
||
{'loss': 0.4265, 'grad_norm': 18.769145965576172, 'learning_rate': 4.4636895135509966e-07, 'margin_dpo/margin_mean': 9.642545700073242, 'margin_dpo/margin_std': 11.237717628479004, 'logps/chosen': -84.81422424316406, 'logps/rejected': -110.46153259277344, 'logps/ref_chosen': -77.24075317382812, 'logps/ref_rejected': -93.24552917480469, 'logits/chosen': -0.6338332295417786, 'logits/rejected': -0.6123248338699341, 'epoch': 0.29}
|
||
|
||
29%|██▉ | 100/340 [04:21<10:27, 2.61s/it][INFO|trainer.py:4307] 2026-04-10 17:40:00,228 >>
|
||
***** Running Evaluation *****
|
||
[INFO|trainer.py:4309] 2026-04-10 17:40:00,228 >> Num examples = 2339
|
||
[INFO|trainer.py:4312] 2026-04-10 17:40:00,228 >> Batch size = 16
|
||
|
||
|
||
0%| | 0/18 [00:00<?, ?it/s][A
|
||
|
||
11%|█ | 2/18 [00:01<00:08, 1.89it/s][A
|
||
|
||
17%|█▋ | 3/18 [00:02<00:11, 1.32it/s][A
|
||
|
||
22%|██▏ | 4/18 [00:03<00:12, 1.13it/s][A
|
||
|
||
28%|██▊ | 5/18 [00:04<00:13, 1.00s/it][A
|
||
|
||
33%|███▎ | 6/18 [00:05<00:12, 1.05s/it][A
|
||
|
||
39%|███▉ | 7/18 [00:06<00:11, 1.08s/it][A
|
||
|
||
44%|████▍ | 8/18 [00:07<00:10, 1.08s/it][A
|
||
|
||
50%|█████ | 9/18 [00:08<00:09, 1.09s/it][A
|
||
|
||
56%|█████▌ | 10/18 [00:10<00:09, 1.13s/it][A
|
||
|
||
61%|██████ | 11/18 [00:11<00:07, 1.09s/it][A
|
||
|
||
67%|██████▋ | 12/18 [00:12<00:06, 1.15s/it][A
|
||
|
||
72%|███████▏ | 13/18 [00:13<00:05, 1.11s/it][A
|
||
|
||
78%|███████▊ | 14/18 [00:14<00:04, 1.16s/it][A
|
||
|
||
83%|████████▎ | 15/18 [00:15<00:03, 1.16s/it][A
|
||
|
||
89%|████████▉ | 16/18 [00:16<00:02, 1.10s/it][A
|
||
|
||
94%|█████████▍| 17/18 [00:18<00:01, 1.13s/it][A
|
||
|
||
100%|██████████| 18/18 [00:18<00:00, 1.07s/it][A
|
||
|
||
|
||
|
||
[A{'eval_loss': 0.5427329540252686, 'eval_runtime': 20.4041, 'eval_samples_per_second': 114.634, 'eval_steps_per_second': 0.931, 'eval_margin_dpo/margin_mean': 5.341787338256836, 'eval_margin_dpo/margin_std': 10.061349868774414, 'eval_logps/chosen': -107.19888305664062, 'eval_logps/rejected': -95.6607437133789, 'eval_logps/ref_chosen': -97.0617446899414, 'eval_logps/ref_rejected': -80.18183135986328, 'eval_logits/chosen': -0.6361338496208191, 'eval_logits/rejected': -0.6085699200630188, 'epoch': 0.29}
|
||
|
||
29%|██▉ | 100/340 [04:41<10:27, 2.61s/it]
|
||
|
||
100%|██████████| 18/18 [00:19<00:00, 1.07s/it][A
|
||
|
||
[A
|
||
30%|██▉ | 101/340 [04:44<34:47, 8.74s/it]
|
||
30%|███ | 102/340 [04:47<27:22, 6.90s/it]
|
||
30%|███ | 103/340 [04:49<22:02, 5.58s/it]
|
||
31%|███ | 104/340 [04:52<18:25, 4.68s/it]
|
||
31%|███ | 105/340 [04:54<15:47, 4.03s/it]
|
||
|
||
{'loss': 0.427, 'grad_norm': 17.255924224853516, 'learning_rate': 4.381713366536311e-07, 'margin_dpo/margin_mean': 12.32937240600586, 'margin_dpo/margin_std': 11.30049991607666, 'logps/chosen': -76.29129791259766, 'logps/rejected': -92.5668716430664, 'logps/ref_chosen': -70.76807403564453, 'logps/ref_rejected': -74.71427917480469, 'logits/chosen': -0.6774856448173523, 'logits/rejected': -0.6355584263801575, 'epoch': 0.31}
|
||
|
||
31%|███ | 105/340 [04:54<15:47, 4.03s/it]
|
||
31%|███ | 106/340 [04:57<13:57, 3.58s/it]
|
||
31%|███▏ | 107/340 [04:59<12:44, 3.28s/it]
|
||
32%|███▏ | 108/340 [05:02<11:53, 3.08s/it]
|
||
32%|███▏ | 109/340 [05:05<11:20, 2.94s/it]
|
||
32%|███▏ | 110/340 [05:07<10:52, 2.84s/it]
|
||
|
||
{'loss': 0.4145, 'grad_norm': 18.394851684570312, 'learning_rate': 4.2947798076611047e-07, 'margin_dpo/margin_mean': 11.69237995147705, 'margin_dpo/margin_std': 12.493224143981934, 'logps/chosen': -89.50286102294922, 'logps/rejected': -109.15755462646484, 'logps/ref_chosen': -81.14533996582031, 'logps/ref_rejected': -89.10765838623047, 'logits/chosen': -0.6861704587936401, 'logits/rejected': -0.6574342846870422, 'epoch': 0.32}
|
||
|
||
32%|███▏ | 110/340 [05:07<10:52, 2.84s/it]
|
||
33%|███▎ | 111/340 [05:10<10:28, 2.75s/it]
|
||
33%|███▎ | 112/340 [05:12<09:44, 2.56s/it]
|
||
33%|███▎ | 113/340 [05:14<09:36, 2.54s/it]
|
||
34%|███▎ | 114/340 [05:17<09:35, 2.55s/it]
|
||
34%|███▍ | 115/340 [05:20<09:35, 2.56s/it]
|
||
|
||
{'loss': 0.4011, 'grad_norm': 18.24220848083496, 'learning_rate': 4.203117865141635e-07, 'margin_dpo/margin_mean': 11.827718734741211, 'margin_dpo/margin_std': 12.786788940429688, 'logps/chosen': -76.1394271850586, 'logps/rejected': -122.98934173583984, 'logps/ref_chosen': -64.77717590332031, 'logps/ref_rejected': -99.79936218261719, 'logits/chosen': -0.6698350310325623, 'logits/rejected': -0.6532580256462097, 'epoch': 0.34}
|
||
|
||
34%|███▍ | 115/340 [05:20<09:35, 2.56s/it]
|
||
34%|███▍ | 116/340 [05:22<09:45, 2.61s/it]
|
||
34%|███▍ | 117/340 [05:25<09:42, 2.61s/it]
|
||
35%|███▍ | 118/340 [05:27<09:38, 2.60s/it]
|
||
35%|███▌ | 119/340 [05:30<09:45, 2.65s/it]
|
||
35%|███▌ | 120/340 [05:33<09:38, 2.63s/it]
|
||
|
||
{'loss': 0.4154, 'grad_norm': 22.541603088378906, 'learning_rate': 4.106969024216348e-07, 'margin_dpo/margin_mean': 13.501307487487793, 'margin_dpo/margin_std': 12.104052543640137, 'logps/chosen': -86.44108581542969, 'logps/rejected': -104.9658203125, 'logps/ref_chosen': -77.35191345214844, 'logps/ref_rejected': -82.3753433227539, 'logits/chosen': -0.6674679517745972, 'logits/rejected': -0.6289718151092529, 'epoch': 0.35}
|
||
|
||
35%|███▌ | 120/340 [05:33<09:38, 2.63s/it]
|
||
36%|███▌ | 121/340 [05:35<09:34, 2.62s/it]
|
||
36%|███▌ | 122/340 [05:38<09:29, 2.61s/it]
|
||
36%|███▌ | 123/340 [05:41<09:30, 2.63s/it]
|
||
36%|███▋ | 124/340 [05:43<09:19, 2.59s/it]
|
||
37%|███▋ | 125/340 [05:45<08:59, 2.51s/it]
|
||
|
||
{'loss': 0.3871, 'grad_norm': 18.34996223449707, 'learning_rate': 4.006586590948141e-07, 'margin_dpo/margin_mean': 12.104842185974121, 'margin_dpo/margin_std': 12.706830978393555, 'logps/chosen': -84.34068298339844, 'logps/rejected': -109.58891296386719, 'logps/ref_chosen': -74.56766510009766, 'logps/ref_rejected': -87.71104431152344, 'logits/chosen': -0.6953171491622925, 'logits/rejected': -0.6653636693954468, 'epoch': 0.37}
|
||
|
||
37%|███▋ | 125/340 [05:45<08:59, 2.51s/it]
|
||
37%|███▋ | 126/340 [05:48<08:53, 2.49s/it]
|
||
37%|███▋ | 127/340 [05:51<08:57, 2.52s/it]
|
||
38%|███▊ | 128/340 [05:53<08:59, 2.54s/it]
|
||
38%|███▊ | 129/340 [05:56<08:59, 2.56s/it]
|
||
38%|███▊ | 130/340 [05:58<09:00, 2.57s/it]
|
||
|
||
{'loss': 0.4128, 'grad_norm': 19.588794708251953, 'learning_rate': 3.9022350248844246e-07, 'margin_dpo/margin_mean': 13.186391830444336, 'margin_dpo/margin_std': 16.62637710571289, 'logps/chosen': -92.37910461425781, 'logps/rejected': -118.1786117553711, 'logps/ref_chosen': -79.86932373046875, 'logps/ref_rejected': -92.48243713378906, 'logits/chosen': -0.6832663416862488, 'logits/rejected': -0.6475099325180054, 'epoch': 0.38}
|
||
|
||
38%|███▊ | 130/340 [05:58<09:00, 2.57s/it]
|
||
39%|███▊ | 131/340 [06:01<08:43, 2.51s/it]
|
||
39%|███▉ | 132/340 [06:03<08:50, 2.55s/it]
|
||
39%|███▉ | 133/340 [06:06<08:41, 2.52s/it]
|
||
39%|███▉ | 134/340 [06:08<08:39, 2.52s/it]
|
||
40%|███▉ | 135/340 [06:11<08:41, 2.54s/it]
|
||
|
||
{'loss': 0.3855, 'grad_norm': 19.383163452148438, 'learning_rate': 3.794189242333106e-07, 'margin_dpo/margin_mean': 12.800195693969727, 'margin_dpo/margin_std': 15.272809028625488, 'logps/chosen': -93.94104766845703, 'logps/rejected': -115.92558288574219, 'logps/ref_chosen': -82.55046081542969, 'logps/ref_rejected': -91.73478698730469, 'logits/chosen': -0.6862474679946899, 'logits/rejected': -0.6508306264877319, 'epoch': 0.4}
|
||
|
||
40%|███▉ | 135/340 [06:11<08:41, 2.54s/it]
|
||
40%|████ | 136/340 [06:13<08:39, 2.55s/it]
|
||
40%|████ | 137/340 [06:16<08:27, 2.50s/it]
|
||
41%|████ | 138/340 [06:18<08:27, 2.51s/it]
|
||
41%|████ | 139/340 [06:21<08:23, 2.50s/it]
|
||
41%|████ | 140/340 [06:24<08:33, 2.57s/it]
|
||
|
||
{'loss': 0.3782, 'grad_norm': 15.547196388244629, 'learning_rate': 3.6827338920900253e-07, 'margin_dpo/margin_mean': 12.726763725280762, 'margin_dpo/margin_std': 12.062446594238281, 'logps/chosen': -86.93388366699219, 'logps/rejected': -111.50956726074219, 'logps/ref_chosen': -76.40785217285156, 'logps/ref_rejected': -88.25675964355469, 'logits/chosen': -0.6315192580223083, 'logits/rejected': -0.5951318740844727, 'epoch': 0.41}
|
||
|
||
41%|████ | 140/340 [06:24<08:33, 2.57s/it]
|
||
41%|████▏ | 141/340 [06:26<08:33, 2.58s/it]
|
||
42%|████▏ | 142/340 [06:29<08:36, 2.61s/it]
|
||
42%|████▏ | 143/340 [06:31<08:32, 2.60s/it]
|
||
42%|████▏ | 144/340 [06:34<08:18, 2.54s/it]
|
||
43%|████▎ | 145/340 [06:37<08:23, 2.58s/it]
|
||
|
||
{'loss': 0.3722, 'grad_norm': 16.376129150390625, 'learning_rate': 3.568162605525952e-07, 'margin_dpo/margin_mean': 14.328218460083008, 'margin_dpo/margin_std': 13.251609802246094, 'logps/chosen': -90.28238677978516, 'logps/rejected': -105.7525863647461, 'logps/ref_chosen': -79.43595123291016, 'logps/ref_rejected': -80.57792663574219, 'logits/chosen': -0.694092869758606, 'logits/rejected': -0.6596013307571411, 'epoch': 0.43}
|
||
|
||
43%|████▎ | 145/340 [06:37<08:23, 2.58s/it]
|
||
43%|████▎ | 146/340 [06:39<08:23, 2.59s/it]
|
||
43%|████▎ | 147/340 [06:42<08:24, 2.61s/it]
|
||
44%|████▎ | 148/340 [06:44<08:18, 2.60s/it]
|
||
44%|████▍ | 149/340 [06:47<08:13, 2.59s/it]
|
||
44%|████▍ | 150/340 [06:50<08:13, 2.60s/it]
|
||
|
||
{'loss': 0.4063, 'grad_norm': 19.48674201965332, 'learning_rate': 3.4507772230088147e-07, 'margin_dpo/margin_mean': 9.931692123413086, 'margin_dpo/margin_std': 11.23712158203125, 'logps/chosen': -82.92797088623047, 'logps/rejected': -99.82804870605469, 'logps/ref_chosen': -69.55223846435547, 'logps/ref_rejected': -76.5206298828125, 'logits/chosen': -0.6205201745033264, 'logits/rejected': -0.5989262461662292, 'epoch': 0.44}
|
||
|
||
44%|████▍ | 150/340 [06:50<08:13, 2.60s/it]
|
||
44%|████▍ | 151/340 [06:52<08:08, 2.59s/it]
|
||
45%|████▍ | 152/340 [06:55<08:06, 2.59s/it]
|
||
45%|████▌ | 153/340 [06:57<08:02, 2.58s/it]
|
||
45%|████▌ | 154/340 [07:00<08:03, 2.60s/it]
|
||
46%|████▌ | 155/340 [07:02<07:58, 2.59s/it]
|
||
|
||
{'loss': 0.3818, 'grad_norm': 18.904706954956055, 'learning_rate': 3.3308869986991487e-07, 'margin_dpo/margin_mean': 12.618766784667969, 'margin_dpo/margin_std': 14.547628402709961, 'logps/chosen': -98.33650207519531, 'logps/rejected': -106.65342712402344, 'logps/ref_chosen': -83.78580474853516, 'logps/ref_rejected': -79.48396301269531, 'logits/chosen': -0.6716780662536621, 'logits/rejected': -0.6312578320503235, 'epoch': 0.46}
|
||
|
||
46%|████▌ | 155/340 [07:02<07:58, 2.59s/it]
|
||
46%|████▌ | 156/340 [07:05<07:46, 2.53s/it]
|
||
46%|████▌ | 157/340 [07:07<07:44, 2.54s/it]
|
||
46%|████▋ | 158/340 [07:10<07:40, 2.53s/it]
|
||
47%|████▋ | 159/340 [07:12<07:37, 2.53s/it]
|
||
47%|████▋ | 160/340 [07:15<07:39, 2.55s/it]
|
||
|
||
{'loss': 0.3508, 'grad_norm': 16.047494888305664, 'learning_rate': 3.208807785813777e-07, 'margin_dpo/margin_mean': 13.903097152709961, 'margin_dpo/margin_std': 10.593317031860352, 'logps/chosen': -85.62313079833984, 'logps/rejected': -123.3752212524414, 'logps/ref_chosen': -71.89569091796875, 'logps/ref_rejected': -95.74468231201172, 'logits/chosen': -0.6216621994972229, 'logits/rejected': -0.5977298617362976, 'epoch': 0.47}
|
||
|
||
47%|████▋ | 160/340 [07:15<07:39, 2.55s/it]
|
||
47%|████▋ | 161/340 [07:18<07:37, 2.56s/it]
|
||
48%|████▊ | 162/340 [07:20<07:40, 2.59s/it]
|
||
48%|████▊ | 163/340 [07:23<07:41, 2.61s/it]
|
||
48%|████▊ | 164/340 [07:26<07:39, 2.61s/it]
|
||
49%|████▊ | 165/340 [07:28<07:37, 2.61s/it]
|
||
|
||
{'loss': 0.3544, 'grad_norm': 17.789417266845703, 'learning_rate': 3.084861204504122e-07, 'margin_dpo/margin_mean': 17.097713470458984, 'margin_dpo/margin_std': 14.805742263793945, 'logps/chosen': -91.7447738647461, 'logps/rejected': -120.28157806396484, 'logps/ref_chosen': -77.03978729248047, 'logps/ref_rejected': -88.47887420654297, 'logits/chosen': -0.6328192949295044, 'logits/rejected': -0.5899003148078918, 'epoch': 0.49}
|
||
|
||
49%|████▊ | 165/340 [07:28<07:37, 2.61s/it]
|
||
49%|████▉ | 166/340 [07:31<07:22, 2.55s/it]
|
||
49%|████▉ | 167/340 [07:33<07:12, 2.50s/it]
|
||
49%|████▉ | 168/340 [07:36<07:13, 2.52s/it]
|
||
50%|████▉ | 169/340 [07:38<07:05, 2.49s/it]
|
||
50%|█████ | 170/340 [07:40<06:58, 2.46s/it]
|
||
|
||
{'loss': 0.3454, 'grad_norm': 19.674264907836914, 'learning_rate': 2.959373794541426e-07, 'margin_dpo/margin_mean': 14.613912582397461, 'margin_dpo/margin_std': 12.491094589233398, 'logps/chosen': -88.34684753417969, 'logps/rejected': -119.37635803222656, 'logps/ref_chosen': -71.93138122558594, 'logps/ref_rejected': -88.34697723388672, 'logits/chosen': -0.6691595911979675, 'logits/rejected': -0.6374617218971252, 'epoch': 0.5}
|
||
|
||
50%|█████ | 170/340 [07:40<06:58, 2.46s/it]
|
||
50%|█████ | 171/340 [07:43<07:15, 2.58s/it]
|
||
51%|█████ | 172/340 [07:46<07:13, 2.58s/it]
|
||
51%|█████ | 173/340 [07:48<07:10, 2.58s/it]
|
||
51%|█████ | 174/340 [07:51<06:51, 2.48s/it]
|
||
51%|█████▏ | 175/340 [07:53<06:52, 2.50s/it]
|
||
|
||
{'loss': 0.3713, 'grad_norm': 20.303539276123047, 'learning_rate': 2.8326761550411346e-07, 'margin_dpo/margin_mean': 12.219950675964355, 'margin_dpo/margin_std': 13.678237915039062, 'logps/chosen': -86.78947448730469, 'logps/rejected': -123.58447265625, 'logps/ref_chosen': -68.0127182006836, 'logps/ref_rejected': -92.58775329589844, 'logits/chosen': -0.6473700404167175, 'logits/rejected': -0.6196728944778442, 'epoch': 0.51}
|
||
|
||
51%|█████▏ | 175/340 [07:53<06:52, 2.50s/it]
|
||
52%|█████▏ | 176/340 [07:56<06:55, 2.53s/it]
|
||
52%|█████▏ | 177/340 [07:58<06:56, 2.55s/it]
|
||
52%|█████▏ | 178/340 [08:01<06:56, 2.57s/it]
|
||
53%|█████▎ | 179/340 [08:04<06:55, 2.58s/it]
|
||
53%|█████▎ | 180/340 [08:06<06:49, 2.56s/it]
|
||
|
||
{'loss': 0.3585, 'grad_norm': 17.388011932373047, 'learning_rate': 2.7051020734928443e-07, 'margin_dpo/margin_mean': 17.15180778503418, 'margin_dpo/margin_std': 14.575396537780762, 'logps/chosen': -80.26910400390625, 'logps/rejected': -122.92547607421875, 'logps/ref_chosen': -61.942466735839844, 'logps/ref_rejected': -87.44703674316406, 'logits/chosen': -0.611466646194458, 'logits/rejected': -0.587906002998352, 'epoch': 0.53}
|
||
|
||
53%|█████▎ | 180/340 [08:06<06:49, 2.56s/it]
|
||
53%|█████▎ | 181/340 [08:09<06:50, 2.58s/it]
|
||
54%|█████▎ | 182/340 [08:11<06:47, 2.58s/it]
|
||
54%|█████▍ | 183/340 [08:14<06:50, 2.61s/it]
|
||
54%|█████▍ | 184/340 [08:17<06:55, 2.66s/it]
|
||
54%|█████▍ | 185/340 [08:19<06:50, 2.65s/it]
|
||
|
||
{'loss': 0.3501, 'grad_norm': 19.291353225708008, 'learning_rate': 2.5769876463904263e-07, 'margin_dpo/margin_mean': 14.570175170898438, 'margin_dpo/margin_std': 14.043818473815918, 'logps/chosen': -87.93196105957031, 'logps/rejected': -99.19012451171875, 'logps/ref_chosen': -72.35160064697266, 'logps/ref_rejected': -69.03958129882812, 'logits/chosen': -0.6199885606765747, 'logits/rejected': -0.5656689405441284, 'epoch': 0.54}
|
||
|
||
54%|█████▍ | 185/340 [08:19<06:50, 2.65s/it]
|
||
55%|█████▍ | 186/340 [08:22<06:46, 2.64s/it]
|
||
55%|█████▌ | 187/340 [08:25<06:43, 2.64s/it]
|
||
55%|█████▌ | 188/340 [08:27<06:38, 2.62s/it]
|
||
56%|█████▌ | 189/340 [08:30<06:26, 2.56s/it]
|
||
56%|█████▌ | 190/340 [08:32<06:26, 2.58s/it]
|
||
|
||
{'loss': 0.3605, 'grad_norm': 18.855066299438477, 'learning_rate': 2.4486703937790243e-07, 'margin_dpo/margin_mean': 14.287073135375977, 'margin_dpo/margin_std': 14.959236145019531, 'logps/chosen': -100.40862274169922, 'logps/rejected': -106.55586242675781, 'logps/ref_chosen': -79.45222473144531, 'logps/ref_rejected': -71.31239318847656, 'logits/chosen': -0.6612949967384338, 'logits/rejected': -0.6132839322090149, 'epoch': 0.56}
|
||
|
||
56%|█████▌ | 190/340 [08:32<06:26, 2.58s/it]
|
||
56%|█████▌ | 191/340 [08:35<06:25, 2.59s/it]
|
||
56%|█████▋ | 192/340 [08:37<06:16, 2.54s/it]
|
||
57%|█████▋ | 193/340 [08:40<06:15, 2.56s/it]
|
||
57%|█████▋ | 194/340 [08:43<06:16, 2.58s/it]
|
||
57%|█████▋ | 195/340 [08:45<06:15, 2.59s/it]
|
||
|
||
{'loss': 0.3429, 'grad_norm': 20.077083587646484, 'learning_rate': 2.320488370051681e-07, 'margin_dpo/margin_mean': 18.4035587310791, 'margin_dpo/margin_std': 15.252446174621582, 'logps/chosen': -89.4631118774414, 'logps/rejected': -121.50825500488281, 'logps/ref_chosen': -71.20511627197266, 'logps/ref_rejected': -84.8467025756836, 'logits/chosen': -0.667130172252655, 'logits/rejected': -0.6179927587509155, 'epoch': 0.57}
|
||
|
||
57%|█████▋ | 195/340 [08:45<06:15, 2.59s/it]
|
||
58%|█████▊ | 196/340 [08:48<06:08, 2.56s/it]
|
||
58%|█████▊ | 197/340 [08:50<06:06, 2.56s/it]
|
||
58%|█████▊ | 198/340 [08:53<05:58, 2.52s/it]
|
||
59%|█████▊ | 199/340 [08:55<05:57, 2.54s/it]
|
||
59%|█████▉ | 200/340 [08:58<05:50, 2.50s/it]
|
||
|
||
{'loss': 0.3411, 'grad_norm': 17.699968338012695, 'learning_rate': 2.192779273338215e-07, 'margin_dpo/margin_mean': 16.283123016357422, 'margin_dpo/margin_std': 15.363842964172363, 'logps/chosen': -89.16279602050781, 'logps/rejected': -104.97953796386719, 'logps/ref_chosen': -71.31782531738281, 'logps/ref_rejected': -70.8514404296875, 'logits/chosen': -0.6087943911552429, 'logits/rejected': -0.5693117380142212, 'epoch': 0.59}
|
||
|
||
59%|█████▉ | 200/340 [08:58<05:50, 2.50s/it][INFO|trainer.py:4307] 2026-04-10 17:44:36,788 >>
|
||
***** Running Evaluation *****
|
||
[INFO|trainer.py:4309] 2026-04-10 17:44:36,788 >> Num examples = 2339
|
||
[INFO|trainer.py:4312] 2026-04-10 17:44:36,788 >> Batch size = 16
|
||
|
||
|
||
0%| | 0/18 [00:00<?, ?it/s][A
|
||
|
||
11%|█ | 2/18 [00:01<00:08, 1.90it/s][A
|
||
|
||
17%|█▋ | 3/18 [00:02<00:11, 1.32it/s][A
|
||
|
||
22%|██▏ | 4/18 [00:03<00:12, 1.13it/s][A
|
||
|
||
28%|██▊ | 5/18 [00:04<00:13, 1.01s/it][A
|
||
|
||
33%|███▎ | 6/18 [00:05<00:12, 1.04s/it][A
|
||
|
||
39%|███▉ | 7/18 [00:06<00:11, 1.08s/it][A
|
||
|
||
44%|████▍ | 8/18 [00:07<00:10, 1.07s/it][A
|
||
|
||
50%|█████ | 9/18 [00:08<00:09, 1.08s/it][A
|
||
|
||
56%|█████▌ | 10/18 [00:10<00:08, 1.12s/it][A
|
||
|
||
61%|██████ | 11/18 [00:11<00:07, 1.09s/it][A
|
||
|
||
67%|██████▋ | 12/18 [00:12<00:06, 1.14s/it][A
|
||
|
||
72%|███████▏ | 13/18 [00:13<00:05, 1.11s/it][A
|
||
|
||
78%|███████▊ | 14/18 [00:14<00:04, 1.15s/it][A
|
||
|
||
83%|████████▎ | 15/18 [00:15<00:03, 1.15s/it][A
|
||
|
||
89%|████████▉ | 16/18 [00:16<00:02, 1.10s/it][A
|
||
|
||
94%|█████████▍| 17/18 [00:17<00:01, 1.12s/it][A
|
||
|
||
100%|██████████| 18/18 [00:18<00:00, 1.06s/it][A
|
||
|
||
|
||
|
||
[A{'eval_loss': 0.4754122495651245, 'eval_runtime': 20.3073, 'eval_samples_per_second': 115.18, 'eval_steps_per_second': 0.936, 'eval_margin_dpo/margin_mean': 10.299551010131836, 'eval_margin_dpo/margin_std': 14.652626991271973, 'eval_logps/chosen': -119.31637573242188, 'eval_logps/rejected': -112.73600769042969, 'eval_logps/ref_chosen': -97.0617446899414, 'eval_logps/ref_rejected': -80.18183135986328, 'eval_logits/chosen': -0.602095365524292, 'eval_logits/rejected': -0.5640405416488647, 'epoch': 0.59}
|
||
|
||
59%|█████▉ | 200/340 [09:18<05:50, 2.50s/it]
|
||
|
||
100%|██████████| 18/18 [00:19<00:00, 1.06s/it][A
|
||
|
||
[A[INFO|trainer.py:3984] 2026-04-10 17:45:12,597 >> Saving model checkpoint to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/checkpoint-200
|
||
[INFO|configuration_utils.py:419] 2026-04-10 17:45:12,605 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/checkpoint-200/config.json
|
||
[INFO|configuration_utils.py:911] 2026-04-10 17:45:12,610 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/checkpoint-200/generation_config.json
|
||
[INFO|modeling_utils.py:3580] 2026-04-10 17:45:56,024 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 6 checkpoint shards. You can find where each parameters has been saved in the index located at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/checkpoint-200/model.safetensors.index.json.
|
||
[INFO|tokenization_utils_base.py:2510] 2026-04-10 17:45:56,031 >> tokenizer config file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/checkpoint-200/tokenizer_config.json
|
||
[INFO|tokenization_utils_base.py:2519] 2026-04-10 17:45:56,034 >> Special tokens file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/checkpoint-200/special_tokens_map.json
|
||
|
||
59%|█████▉ | 201/340 [13:21<3:07:16, 80.84s/it]
|
||
59%|█████▉ | 202/340 [13:24<2:11:48, 57.31s/it]
|
||
60%|█████▉ | 203/340 [13:26<1:33:12, 40.82s/it]
|
||
60%|██████ | 204/340 [13:29<1:06:30, 29.34s/it]
|
||
60%|██████ | 205/340 [13:31<47:58, 21.32s/it]
|
||
|
||
{'loss': 0.3792, 'grad_norm': 19.535417556762695, 'learning_rate': 2.065879555832674e-07, 'margin_dpo/margin_mean': 13.387273788452148, 'margin_dpo/margin_std': 14.807754516601562, 'logps/chosen': -104.2248764038086, 'logps/rejected': -104.95343017578125, 'logps/ref_chosen': -84.44103240966797, 'logps/ref_rejected': -71.78230285644531, 'logits/chosen': -0.5760528445243835, 'logits/rejected': -0.5279114842414856, 'epoch': 0.6}
|
||
|
||
60%|██████ | 205/340 [13:31<47:58, 21.32s/it]
|
||
61%|██████ | 206/340 [13:33<34:50, 15.60s/it]
|
||
61%|██████ | 207/340 [13:36<25:57, 11.71s/it]
|
||
61%|██████ | 208/340 [13:39<19:46, 8.99s/it]
|
||
61%|██████▏ | 209/340 [13:41<15:25, 7.06s/it]
|
||
62%|██████▏ | 210/340 [13:44<12:19, 5.69s/it]
|
||
|
||
{'loss': 0.3251, 'grad_norm': 17.17575454711914, 'learning_rate': 1.9401235374032425e-07, 'margin_dpo/margin_mean': 14.702362060546875, 'margin_dpo/margin_std': 16.377933502197266, 'logps/chosen': -101.36656188964844, 'logps/rejected': -108.5728988647461, 'logps/ref_chosen': -83.94493103027344, 'logps/ref_rejected': -76.44892120361328, 'logits/chosen': -0.6245664358139038, 'logits/rejected': -0.5699684619903564, 'epoch': 0.62}
|
||
|
||
62%|██████▏ | 210/340 [13:44<12:19, 5.69s/it]
|
||
62%|██████▏ | 211/340 [13:46<10:16, 4.78s/it]
|
||
62%|██████▏ | 212/340 [13:49<08:48, 4.13s/it]
|
||
63%|██████▎ | 213/340 [13:52<07:47, 3.68s/it]
|
||
63%|██████▎ | 214/340 [13:54<07:03, 3.36s/it]
|
||
63%|██████▎ | 215/340 [13:57<06:33, 3.15s/it]
|
||
|
||
{'loss': 0.3633, 'grad_norm': 20.044084548950195, 'learning_rate': 1.8158425248197928e-07, 'margin_dpo/margin_mean': 16.278963088989258, 'margin_dpo/margin_std': 19.206457138061523, 'logps/chosen': -102.8707275390625, 'logps/rejected': -122.053955078125, 'logps/ref_chosen': -82.23881530761719, 'logps/ref_rejected': -85.1430892944336, 'logits/chosen': -0.5605936050415039, 'logits/rejected': -0.5190353393554688, 'epoch': 0.63}
|
||
|
||
63%|██████▎ | 215/340 [13:57<06:33, 3.15s/it]
|
||
64%|██████▎ | 216/340 [14:00<06:11, 2.99s/it]
|
||
64%|██████▍ | 217/340 [14:02<05:50, 2.85s/it]
|
||
64%|██████▍ | 218/340 [14:05<05:45, 2.83s/it]
|
||
64%|██████▍ | 219/340 [14:07<05:34, 2.77s/it]
|
||
65%|██████▍ | 220/340 [14:10<05:24, 2.71s/it]
|
||
|
||
{'loss': 0.3587, 'grad_norm': 21.036956787109375, 'learning_rate': 1.6933639389195134e-07, 'margin_dpo/margin_mean': 11.612079620361328, 'margin_dpo/margin_std': 14.565820693969727, 'logps/chosen': -97.38944244384766, 'logps/rejected': -117.23432922363281, 'logps/ref_chosen': -76.5594482421875, 'logps/ref_rejected': -84.79225158691406, 'logits/chosen': -0.621160626411438, 'logits/rejected': -0.585429310798645, 'epoch': 0.65}
|
||
|
||
65%|██████▍ | 220/340 [14:10<05:24, 2.71s/it]
|
||
65%|██████▌ | 221/340 [14:13<05:21, 2.70s/it]
|
||
65%|██████▌ | 222/340 [14:15<05:17, 2.69s/it]
|
||
66%|██████▌ | 223/340 [14:18<05:11, 2.66s/it]
|
||
66%|██████▌ | 224/340 [14:20<05:01, 2.60s/it]
|
||
66%|██████▌ | 225/340 [14:23<04:55, 2.57s/it]
|
||
|
||
{'loss': 0.3385, 'grad_norm': 21.023571014404297, 'learning_rate': 1.573010452010098e-07, 'margin_dpo/margin_mean': 18.626880645751953, 'margin_dpo/margin_std': 18.950374603271484, 'logps/chosen': -87.20682525634766, 'logps/rejected': -132.78231811523438, 'logps/ref_chosen': -68.70957946777344, 'logps/ref_rejected': -95.65819549560547, 'logits/chosen': -0.6097210049629211, 'logits/rejected': -0.6041680574417114, 'epoch': 0.66}
|
||
|
||
66%|██████▌ | 225/340 [14:23<04:55, 2.57s/it]
|
||
66%|██████▋ | 226/340 [14:26<04:54, 2.58s/it]
|
||
67%|██████▋ | 227/340 [14:28<04:50, 2.57s/it]
|
||
67%|██████▋ | 228/340 [14:31<04:49, 2.58s/it]
|
||
67%|██████▋ | 229/340 [14:33<04:48, 2.60s/it]
|
||
68%|██████▊ | 230/340 [14:36<04:46, 2.61s/it]
|
||
|
||
{'loss': 0.3269, 'grad_norm': 19.34729766845703, 'learning_rate': 1.4550991377830423e-07, 'margin_dpo/margin_mean': 14.579324722290039, 'margin_dpo/margin_std': 14.860456466674805, 'logps/chosen': -92.71955871582031, 'logps/rejected': -129.41712951660156, 'logps/ref_chosen': -76.04148864746094, 'logps/ref_rejected': -98.15973663330078, 'logits/chosen': -0.6367233395576477, 'logits/rejected': -0.5984948873519897, 'epoch': 0.68}
|
||
|
||
68%|██████▊ | 230/340 [14:36<04:46, 2.61s/it]
|
||
68%|██████▊ | 231/340 [14:39<04:43, 2.60s/it]
|
||
68%|██████▊ | 232/340 [14:41<04:38, 2.58s/it]
|
||
69%|██████▊ | 233/340 [14:44<04:36, 2.58s/it]
|
||
69%|██████▉ | 234/340 [14:46<04:35, 2.59s/it]
|
||
69%|██████▉ | 235/340 [14:49<04:34, 2.61s/it]
|
||
|
||
{'loss': 0.3347, 'grad_norm': 18.263099670410156, 'learning_rate': 1.339940635976592e-07, 'margin_dpo/margin_mean': 19.314985275268555, 'margin_dpo/margin_std': 15.413273811340332, 'logps/chosen': -88.53390502929688, 'logps/rejected': -127.80912780761719, 'logps/ref_chosen': -70.64253997802734, 'logps/ref_rejected': -90.60277557373047, 'logits/chosen': -0.6155376434326172, 'logits/rejected': -0.5955866575241089, 'epoch': 0.69}
|
||
|
||
69%|██████▉ | 235/340 [14:49<04:34, 2.61s/it]
|
||
69%|██████▉ | 236/340 [14:52<04:33, 2.63s/it]
|
||
70%|██████▉ | 237/340 [14:54<04:31, 2.63s/it]
|
||
70%|███████ | 238/340 [14:57<04:27, 2.62s/it]
|
||
70%|███████ | 239/340 [14:59<04:24, 2.62s/it]
|
||
71%|███████ | 240/340 [15:02<04:17, 2.57s/it]
|
||
|
||
{'loss': 0.3433, 'grad_norm': 21.18890380859375, 'learning_rate': 1.227838333989088e-07, 'margin_dpo/margin_mean': 17.56354331970215, 'margin_dpo/margin_std': 16.671550750732422, 'logps/chosen': -94.69210052490234, 'logps/rejected': -106.57359313964844, 'logps/ref_chosen': -75.90282440185547, 'logps/ref_rejected': -70.22077178955078, 'logits/chosen': -0.5532498955726624, 'logits/rejected': -0.5167180299758911, 'epoch': 0.71}
|
||
|
||
71%|███████ | 240/340 [15:02<04:17, 2.57s/it]
|
||
71%|███████ | 241/340 [15:05<04:15, 2.58s/it]
|
||
71%|███████ | 242/340 [15:07<04:07, 2.53s/it]
|
||
71%|███████▏ | 243/340 [15:10<04:08, 2.57s/it]
|
||
72%|███████▏ | 244/340 [15:12<04:06, 2.56s/it]
|
||
72%|███████▏ | 245/340 [15:15<04:05, 2.59s/it]
|
||
|
||
{'loss': 0.3073, 'grad_norm': 19.42283058166504, 'learning_rate': 1.1190875675987355e-07, 'margin_dpo/margin_mean': 21.223926544189453, 'margin_dpo/margin_std': 16.53793716430664, 'logps/chosen': -87.87870788574219, 'logps/rejected': -142.7686767578125, 'logps/ref_chosen': -68.88108825683594, 'logps/ref_rejected': -102.547119140625, 'logits/chosen': -0.5711519122123718, 'logits/rejected': -0.5506427884101868, 'epoch': 0.72}
|
||
|
||
72%|███████▏ | 245/340 [15:15<04:05, 2.59s/it]
|
||
72%|███████▏ | 246/340 [15:17<04:03, 2.59s/it]
|
||
73%|███████▎ | 247/340 [15:20<04:03, 2.62s/it]
|
||
73%|███████▎ | 248/340 [15:23<03:59, 2.60s/it]
|
||
73%|███████▎ | 249/340 [15:25<03:56, 2.60s/it]
|
||
74%|███████▎ | 250/340 [15:28<03:52, 2.59s/it]
|
||
|
||
{'loss': 0.4138, 'grad_norm': 21.975610733032227, 'learning_rate': 1.0139748428955333e-07, 'margin_dpo/margin_mean': 16.201473236083984, 'margin_dpo/margin_std': 15.055798530578613, 'logps/chosen': -104.53717041015625, 'logps/rejected': -118.47982025146484, 'logps/ref_chosen': -88.11860656738281, 'logps/ref_rejected': -85.85978698730469, 'logits/chosen': -0.63815838098526, 'logits/rejected': -0.5797184705734253, 'epoch': 0.74}
|
||
|
||
74%|███████▎ | 250/340 [15:28<03:52, 2.59s/it]
|
||
74%|███████▍ | 251/340 [15:30<03:50, 2.59s/it]
|
||
74%|███████▍ | 252/340 [15:33<03:49, 2.61s/it]
|
||
74%|███████▍ | 253/340 [15:36<03:45, 2.59s/it]
|
||
75%|███████▍ | 254/340 [15:38<03:49, 2.67s/it]
|
||
75%|███████▌ | 255/340 [15:41<03:46, 2.66s/it]
|
||
|
||
{'loss': 0.3314, 'grad_norm': 21.86973762512207, 'learning_rate': 9.127770814751932e-08, 'margin_dpo/margin_mean': 16.87302017211914, 'margin_dpo/margin_std': 16.191524505615234, 'logps/chosen': -113.81512451171875, 'logps/rejected': -123.86918640136719, 'logps/ref_chosen': -93.02457427978516, 'logps/ref_rejected': -86.20562744140625, 'logits/chosen': -0.5965814590454102, 'logits/rejected': -0.5407648682594299, 'epoch': 0.75}
|
||
|
||
75%|███████▌ | 255/340 [15:41<03:46, 2.66s/it]
|
||
75%|███████▌ | 256/340 [15:44<03:43, 2.66s/it]
|
||
76%|███████▌ | 257/340 [15:46<03:42, 2.69s/it]
|
||
76%|███████▌ | 258/340 [15:49<03:39, 2.68s/it]
|
||
76%|███████▌ | 259/340 [15:52<03:34, 2.65s/it]
|
||
76%|███████▋ | 260/340 [15:54<03:32, 2.65s/it]
|
||
|
||
{'loss': 0.3414, 'grad_norm': 20.748577117919922, 'learning_rate': 8.15760890883607e-08, 'margin_dpo/margin_mean': 20.42922592163086, 'margin_dpo/margin_std': 16.98196029663086, 'logps/chosen': -98.30900573730469, 'logps/rejected': -133.5509796142578, 'logps/ref_chosen': -79.27108001708984, 'logps/ref_rejected': -94.08381652832031, 'logits/chosen': -0.5860427618026733, 'logits/rejected': -0.5433794856071472, 'epoch': 0.76}
|
||
|
||
76%|███████▋ | 260/340 [15:54<03:32, 2.65s/it]
|
||
77%|███████▋ | 261/340 [15:57<03:26, 2.62s/it]
|
||
77%|███████▋ | 262/340 [16:00<03:25, 2.63s/it]
|
||
77%|███████▋ | 263/340 [16:02<03:21, 2.62s/it]
|
||
78%|███████▊ | 264/340 [16:05<03:19, 2.63s/it]
|
||
78%|███████▊ | 265/340 [16:07<03:16, 2.62s/it]
|
||
|
||
{'loss': 0.3493, 'grad_norm': 20.377286911010742, 'learning_rate': 7.231818622338822e-08, 'margin_dpo/margin_mean': 15.021594047546387, 'margin_dpo/margin_std': 12.837465286254883, 'logps/chosen': -99.11347198486328, 'logps/rejected': -126.92435455322266, 'logps/ref_chosen': -79.24869537353516, 'logps/ref_rejected': -92.03797912597656, 'logits/chosen': -0.5678300857543945, 'logits/rejected': -0.5425071120262146, 'epoch': 0.78}
|
||
|
||
78%|███████▊ | 265/340 [16:07<03:16, 2.62s/it]
|
||
78%|███████▊ | 266/340 [16:10<03:15, 2.64s/it]
|
||
79%|███████▊ | 267/340 [16:13<03:11, 2.62s/it]
|
||
79%|███████▉ | 268/340 [16:15<03:10, 2.65s/it]
|
||
79%|███████▉ | 269/340 [16:18<03:05, 2.61s/it]
|
||
79%|███████▉ | 270/340 [16:21<03:03, 2.63s/it]
|
||
|
||
{'loss': 0.332, 'grad_norm': 17.822444915771484, 'learning_rate': 6.352838968463919e-08, 'margin_dpo/margin_mean': 16.91426658630371, 'margin_dpo/margin_std': 14.53496265411377, 'logps/chosen': -97.48078918457031, 'logps/rejected': -116.37190246582031, 'logps/ref_chosen': -80.15914154052734, 'logps/ref_rejected': -82.13599395751953, 'logits/chosen': -0.606745719909668, 'logits/rejected': -0.5473134517669678, 'epoch': 0.79}
|
||
|
||
79%|███████▉ | 270/340 [16:21<03:03, 2.63s/it]
|
||
80%|███████▉ | 271/340 [16:23<03:00, 2.62s/it]
|
||
80%|████████ | 272/340 [16:26<02:57, 2.60s/it]
|
||
80%|████████ | 273/340 [16:28<02:54, 2.60s/it]
|
||
81%|████████ | 274/340 [16:31<02:51, 2.61s/it]
|
||
81%|████████ | 275/340 [16:34<02:49, 2.61s/it]
|
||
|
||
{'loss': 0.3348, 'grad_norm': 20.570648193359375, 'learning_rate': 5.5229856368582376e-08, 'margin_dpo/margin_mean': 16.90357780456543, 'margin_dpo/margin_std': 20.21615219116211, 'logps/chosen': -99.41848754882812, 'logps/rejected': -122.4229965209961, 'logps/ref_chosen': -78.87225341796875, 'logps/ref_rejected': -84.97318267822266, 'logits/chosen': -0.6010477542877197, 'logits/rejected': -0.5661951899528503, 'epoch': 0.81}
|
||
|
||
81%|████████ | 275/340 [16:34<02:49, 2.61s/it]
|
||
81%|████████ | 276/340 [16:36<02:43, 2.55s/it]
|
||
81%|████████▏ | 277/340 [16:39<02:41, 2.57s/it]
|
||
82%|████████▏ | 278/340 [16:41<02:39, 2.57s/it]
|
||
82%|████████▏ | 279/340 [16:44<02:35, 2.55s/it]
|
||
82%|████████▏ | 280/340 [16:46<02:35, 2.59s/it]
|
||
|
||
{'loss': 0.3329, 'grad_norm': 18.737754821777344, 'learning_rate': 4.7444448928806615e-08, 'margin_dpo/margin_mean': 20.195457458496094, 'margin_dpo/margin_std': 19.39859390258789, 'logps/chosen': -117.15876770019531, 'logps/rejected': -154.00479125976562, 'logps/ref_chosen': -96.47113800048828, 'logps/ref_rejected': -113.1217041015625, 'logits/chosen': -0.5662145018577576, 'logits/rejected': -0.525722324848175, 'epoch': 0.82}
|
||
|
||
82%|████████▏ | 280/340 [16:46<02:35, 2.59s/it]
|
||
83%|████████▎ | 281/340 [16:49<02:35, 2.64s/it]
|
||
83%|████████▎ | 282/340 [16:52<02:32, 2.63s/it]
|
||
83%|████████▎ | 283/340 [16:54<02:30, 2.64s/it]
|
||
84%|████████▎ | 284/340 [16:57<02:26, 2.61s/it]
|
||
84%|████████▍ | 285/340 [16:59<02:22, 2.58s/it]
|
||
|
||
{'loss': 0.3382, 'grad_norm': 21.463726043701172, 'learning_rate': 4.019267817841834e-08, 'margin_dpo/margin_mean': 17.379127502441406, 'margin_dpo/margin_std': 17.829914093017578, 'logps/chosen': -111.90663146972656, 'logps/rejected': -114.01655578613281, 'logps/ref_chosen': -91.53522491455078, 'logps/ref_rejected': -76.2660140991211, 'logits/chosen': -0.630197286605835, 'logits/rejected': -0.5674210786819458, 'epoch': 0.84}
|
||
|
||
84%|████████▍ | 285/340 [16:59<02:22, 2.58s/it]
|
||
84%|████████▍ | 286/340 [17:02<02:19, 2.58s/it]
|
||
84%|████████▍ | 287/340 [17:05<02:16, 2.57s/it]
|
||
85%|████████▍ | 288/340 [17:07<02:16, 2.63s/it]
|
||
85%|████████▌ | 289/340 [17:10<02:13, 2.61s/it]
|
||
85%|████████▌ | 290/340 [17:13<02:11, 2.63s/it]
|
||
|
||
{'loss': 0.3409, 'grad_norm': 18.62375831604004, 'learning_rate': 3.349364905389032e-08, 'margin_dpo/margin_mean': 18.841894149780273, 'margin_dpo/margin_std': 18.295745849609375, 'logps/chosen': -98.92496490478516, 'logps/rejected': -117.43675231933594, 'logps/ref_chosen': -78.96186828613281, 'logps/ref_rejected': -78.63177490234375, 'logits/chosen': -0.5863774418830872, 'logits/rejected': -0.5456980466842651, 'epoch': 0.85}
|
||
|
||
85%|████████▌ | 290/340 [17:13<02:11, 2.63s/it]
|
||
86%|████████▌ | 291/340 [17:15<02:08, 2.62s/it]
|
||
86%|████████▌ | 292/340 [17:18<02:05, 2.61s/it]
|
||
86%|████████▌ | 293/340 [17:20<02:02, 2.61s/it]
|
||
86%|████████▋ | 294/340 [17:23<01:58, 2.57s/it]
|
||
87%|████████▋ | 295/340 [17:25<01:55, 2.57s/it]
|
||
|
||
{'loss': 0.3351, 'grad_norm': 16.586910247802734, 'learning_rate': 2.736501028272095e-08, 'margin_dpo/margin_mean': 15.721613883972168, 'margin_dpo/margin_std': 16.5610294342041, 'logps/chosen': -85.10719299316406, 'logps/rejected': -135.39389038085938, 'logps/ref_chosen': -64.14302825927734, 'logps/ref_rejected': -98.70811462402344, 'logits/chosen': -0.5259509086608887, 'logits/rejected': -0.5359938144683838, 'epoch': 0.87}
|
||
|
||
87%|████████▋ | 295/340 [17:25<01:55, 2.57s/it]
|
||
87%|████████▋ | 296/340 [17:28<01:53, 2.57s/it]
|
||
87%|████████▋ | 297/340 [17:31<01:50, 2.58s/it]
|
||
88%|████████▊ | 298/340 [17:33<01:48, 2.59s/it]
|
||
88%|████████▊ | 299/340 [17:36<01:45, 2.57s/it]
|
||
88%|████████▊ | 300/340 [17:38<01:43, 2.59s/it]
|
||
|
||
{'loss': 0.3552, 'grad_norm': 19.39561653137207, 'learning_rate': 2.1822907887504932e-08, 'margin_dpo/margin_mean': 18.2686824798584, 'margin_dpo/margin_std': 16.341278076171875, 'logps/chosen': -80.19596099853516, 'logps/rejected': -130.80763244628906, 'logps/ref_chosen': -59.2784423828125, 'logps/ref_rejected': -91.62141418457031, 'logits/chosen': -0.5196036696434021, 'logits/rejected': -0.5250274538993835, 'epoch': 0.88}
|
||
|
||
88%|████████▊ | 300/340 [17:38<01:43, 2.59s/it][INFO|trainer.py:4307] 2026-04-10 17:53:17,548 >>
|
||
***** Running Evaluation *****
|
||
[INFO|trainer.py:4309] 2026-04-10 17:53:17,548 >> Num examples = 2339
|
||
[INFO|trainer.py:4312] 2026-04-10 17:53:17,548 >> Batch size = 16
|
||
|
||
|
||
0%| | 0/18 [00:00<?, ?it/s][A
|
||
|
||
11%|█ | 2/18 [00:01<00:08, 1.90it/s][A
|
||
|
||
17%|█▋ | 3/18 [00:02<00:11, 1.32it/s][A
|
||
|
||
22%|██▏ | 4/18 [00:03<00:12, 1.13it/s][A
|
||
|
||
28%|██▊ | 5/18 [00:04<00:13, 1.00s/it][A
|
||
|
||
33%|███▎ | 6/18 [00:05<00:12, 1.04s/it][A
|
||
|
||
39%|███▉ | 7/18 [00:06<00:11, 1.07s/it][A
|
||
|
||
44%|████▍ | 8/18 [00:07<00:10, 1.07s/it][A
|
||
|
||
50%|█████ | 9/18 [00:08<00:09, 1.08s/it][A
|
||
|
||
56%|█████▌ | 10/18 [00:10<00:08, 1.12s/it][A
|
||
|
||
61%|██████ | 11/18 [00:11<00:07, 1.09s/it][A
|
||
|
||
67%|██████▋ | 12/18 [00:12<00:06, 1.15s/it][A
|
||
|
||
72%|███████▏ | 13/18 [00:13<00:05, 1.11s/it][A
|
||
|
||
78%|███████▊ | 14/18 [00:14<00:04, 1.15s/it][A
|
||
|
||
83%|████████▎ | 15/18 [00:15<00:03, 1.15s/it][A
|
||
|
||
89%|████████▉ | 16/18 [00:16<00:02, 1.10s/it][A
|
||
|
||
94%|█████████▍| 17/18 [00:17<00:01, 1.13s/it][A
|
||
|
||
100%|██████████| 18/18 [00:18<00:00, 1.06s/it][A
|
||
|
||
|
||
|
||
[A{'eval_loss': 0.4588142931461334, 'eval_runtime': 20.3107, 'eval_samples_per_second': 115.161, 'eval_steps_per_second': 0.935, 'eval_margin_dpo/margin_mean': 11.118718147277832, 'eval_margin_dpo/margin_std': 15.069600105285645, 'eval_logps/chosen': -119.7147216796875, 'eval_logps/rejected': -113.95352935791016, 'eval_logps/ref_chosen': -97.0617446899414, 'eval_logps/ref_rejected': -80.18183135986328, 'eval_logits/chosen': -0.5876314640045166, 'eval_logits/rejected': -0.5494834184646606, 'epoch': 0.88}
|
||
|
||
88%|████████▊ | 300/340 [17:59<01:43, 2.59s/it]
|
||
|
||
100%|██████████| 18/18 [00:19<00:00, 1.06s/it][A
|
||
|
||
[A
|
||
89%|████████▊ | 301/340 [18:01<05:39, 8.70s/it]
|
||
89%|████████▉ | 302/340 [18:04<04:20, 6.86s/it]
|
||
89%|████████▉ | 303/340 [18:06<03:26, 5.57s/it]
|
||
89%|████████▉ | 304/340 [18:09<02:47, 4.66s/it]
|
||
90%|████████▉ | 305/340 [18:11<02:18, 3.95s/it]
|
||
|
||
{'loss': 0.3241, 'grad_norm': 20.72559356689453, 'learning_rate': 1.6881942648911074e-08, 'margin_dpo/margin_mean': 20.912582397460938, 'margin_dpo/margin_std': 15.790578842163086, 'logps/chosen': -110.14324951171875, 'logps/rejected': -134.03268432617188, 'logps/ref_chosen': -90.05252838134766, 'logps/ref_rejected': -93.02938842773438, 'logits/chosen': -0.6059945821762085, 'logits/rejected': -0.5594589710235596, 'epoch': 0.9}
|
||
|
||
90%|████████▉ | 305/340 [18:11<02:18, 3.95s/it]
|
||
90%|█████████ | 306/340 [18:14<01:59, 3.52s/it]
|
||
90%|█████████ | 307/340 [18:16<01:46, 3.24s/it]
|
||
91%|█████████ | 308/340 [18:19<01:38, 3.07s/it]
|
||
91%|█████████ | 309/340 [18:22<01:31, 2.94s/it]
|
||
91%|█████████ | 310/340 [18:24<01:24, 2.82s/it]
|
||
|
||
{'loss': 0.3336, 'grad_norm': 18.592208862304688, 'learning_rate': 1.2555131639630567e-08, 'margin_dpo/margin_mean': 17.357715606689453, 'margin_dpo/margin_std': 17.407108306884766, 'logps/chosen': -99.32337951660156, 'logps/rejected': -121.98432922363281, 'logps/ref_chosen': -76.26285552978516, 'logps/ref_rejected': -81.56607055664062, 'logits/chosen': -0.5199320316314697, 'logits/rejected': -0.48348456621170044, 'epoch': 0.91}
|
||
|
||
91%|█████████ | 310/340 [18:24<01:24, 2.82s/it]
|
||
91%|█████████▏| 311/340 [18:27<01:19, 2.75s/it]
|
||
92%|█████████▏| 312/340 [18:30<01:16, 2.73s/it]
|
||
92%|█████████▏| 313/340 [18:32<01:12, 2.69s/it]
|
||
92%|█████████▏| 314/340 [18:35<01:09, 2.67s/it]
|
||
93%|█████████▎| 315/340 [18:37<01:06, 2.65s/it]
|
||
|
||
{'loss': 0.3444, 'grad_norm': 19.586881637573242, 'learning_rate': 8.85387393063622e-09, 'margin_dpo/margin_mean': 17.211929321289062, 'margin_dpo/margin_std': 18.306108474731445, 'logps/chosen': -108.92083740234375, 'logps/rejected': -129.36099243164062, 'logps/ref_chosen': -89.47105407714844, 'logps/ref_rejected': -92.69927215576172, 'logits/chosen': -0.5956140160560608, 'logits/rejected': -0.5609453916549683, 'epoch': 0.93}
|
||
|
||
93%|█████████▎| 315/340 [18:37<01:06, 2.65s/it]
|
||
93%|█████████▎| 316/340 [18:40<01:02, 2.60s/it]
|
||
93%|█████████▎| 317/340 [18:42<00:59, 2.60s/it]
|
||
94%|█████████▎| 318/340 [18:45<00:57, 2.60s/it]
|
||
94%|█████████▍| 319/340 [18:48<00:54, 2.61s/it]
|
||
94%|█████████▍| 320/340 [18:50<00:51, 2.60s/it]
|
||
|
||
{'loss': 0.3732, 'grad_norm': 21.697298049926758, 'learning_rate': 5.7879205600998296e-09, 'margin_dpo/margin_mean': 15.386384963989258, 'margin_dpo/margin_std': 15.031097412109375, 'logps/chosen': -98.2002182006836, 'logps/rejected': -102.35930633544922, 'logps/ref_chosen': -76.45301818847656, 'logps/ref_rejected': -65.2257308959961, 'logits/chosen': -0.6173444986343384, 'logits/rejected': -0.5614223480224609, 'epoch': 0.94}
|
||
|
||
94%|█████████▍| 320/340 [18:50<00:51, 2.60s/it]
|
||
94%|█████████▍| 321/340 [18:53<00:49, 2.60s/it]
|
||
95%|█████████▍| 322/340 [18:55<00:46, 2.60s/it]
|
||
95%|█████████▌| 323/340 [18:58<00:44, 2.61s/it]
|
||
95%|█████████▌| 324/340 [19:01<00:41, 2.61s/it]
|
||
96%|█████████▌| 325/340 [19:03<00:39, 2.60s/it]
|
||
|
||
{'loss': 0.3374, 'grad_norm': 22.468570709228516, 'learning_rate': 3.3653488440851253e-09, 'margin_dpo/margin_mean': 15.664227485656738, 'margin_dpo/margin_std': 12.905950546264648, 'logps/chosen': -89.80387878417969, 'logps/rejected': -102.19793701171875, 'logps/ref_chosen': -71.98212432861328, 'logps/ref_rejected': -68.71195983886719, 'logits/chosen': -0.5936331152915955, 'logits/rejected': -0.5392800569534302, 'epoch': 0.96}
|
||
|
||
96%|█████████▌| 325/340 [19:03<00:39, 2.60s/it]
|
||
96%|█████████▌| 326/340 [19:06<00:36, 2.58s/it]
|
||
96%|█████████▌| 327/340 [19:08<00:32, 2.54s/it]
|
||
96%|█████████▋| 328/340 [19:11<00:30, 2.55s/it]
|
||
97%|█████████▋| 329/340 [19:13<00:27, 2.54s/it]
|
||
97%|█████████▋| 330/340 [19:16<00:25, 2.56s/it]
|
||
|
||
{'loss': 0.3212, 'grad_norm': 18.491226196289062, 'learning_rate': 1.592541096695571e-09, 'margin_dpo/margin_mean': 21.361230850219727, 'margin_dpo/margin_std': 19.999116897583008, 'logps/chosen': -95.72080993652344, 'logps/rejected': -132.95394897460938, 'logps/ref_chosen': -77.13968658447266, 'logps/ref_rejected': -93.0115737915039, 'logits/chosen': -0.5897213816642761, 'logits/rejected': -0.5493496656417847, 'epoch': 0.97}
|
||
|
||
97%|█████████▋| 330/340 [19:16<00:25, 2.56s/it]
|
||
97%|█████████▋| 331/340 [19:19<00:23, 2.57s/it]
|
||
98%|█████████▊| 332/340 [19:21<00:21, 2.63s/it]
|
||
98%|█████████▊| 333/340 [19:24<00:18, 2.62s/it]
|
||
98%|█████████▊| 334/340 [19:27<00:15, 2.66s/it]
|
||
99%|█████████▊| 335/340 [19:29<00:13, 2.64s/it]
|
||
|
||
{'loss': 0.3225, 'grad_norm': 17.843168258666992, 'learning_rate': 4.741678157389739e-10, 'margin_dpo/margin_mean': 13.225196838378906, 'margin_dpo/margin_std': 12.341458320617676, 'logps/chosen': -97.75109100341797, 'logps/rejected': -106.00955963134766, 'logps/ref_chosen': -78.12508392333984, 'logps/ref_rejected': -73.1583480834961, 'logits/chosen': -0.5449101328849792, 'logits/rejected': -0.506639301776886, 'epoch': 0.99}
|
||
|
||
99%|█████████▊| 335/340 [19:29<00:13, 2.64s/it]
|
||
99%|█████████▉| 336/340 [19:32<00:10, 2.59s/it]
|
||
99%|█████████▉| 337/340 [19:34<00:07, 2.57s/it]
|
||
99%|█████████▉| 338/340 [19:37<00:05, 2.56s/it]
|
||
100%|█████████▉| 339/340 [19:39<00:02, 2.48s/it]
|
||
100%|██████████| 340/340 [19:42<00:00, 2.52s/it]
|
||
|
||
{'loss': 0.3138, 'grad_norm': 19.59518051147461, 'learning_rate': 1.31753782067201e-11, 'margin_dpo/margin_mean': 18.689212799072266, 'margin_dpo/margin_std': 18.127058029174805, 'logps/chosen': -85.4710922241211, 'logps/rejected': -113.6316146850586, 'logps/ref_chosen': -64.36441802978516, 'logps/ref_rejected': -73.83573913574219, 'logits/chosen': -0.5786937475204468, 'logits/rejected': -0.544124186038971, 'epoch': 1.0}
|
||
|
||
100%|██████████| 340/340 [19:42<00:00, 2.52s/it][INFO|trainer.py:3984] 2026-04-10 17:55:36,220 >> Saving model checkpoint to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/checkpoint-340
|
||
[INFO|configuration_utils.py:419] 2026-04-10 17:55:36,227 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/checkpoint-340/config.json
|
||
[INFO|configuration_utils.py:911] 2026-04-10 17:55:36,231 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/checkpoint-340/generation_config.json
|
||
[INFO|modeling_utils.py:3580] 2026-04-10 17:56:15,466 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 6 checkpoint shards. You can find where each parameters has been saved in the index located at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/checkpoint-340/model.safetensors.index.json.
|
||
[INFO|tokenization_utils_base.py:2510] 2026-04-10 17:56:15,471 >> tokenizer config file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/checkpoint-340/tokenizer_config.json
|
||
[INFO|tokenization_utils_base.py:2519] 2026-04-10 17:56:15,474 >> Special tokens file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/checkpoint-340/special_tokens_map.json
|
||
[INFO|trainer.py:2681] 2026-04-10 17:59:29,929 >>
|
||
|
||
Training completed. Do not forget to share your model on huggingface.co/models =)
|
||
|
||
|
||
|
||
|
||
{'train_runtime': 1436.8705, 'train_samples_per_second': 30.342, 'train_steps_per_second': 0.237, 'train_loss': 0.4133688477908864, 'epoch': 1.0}
|
||
|
||
100%|██████████| 340/340 [23:51<00:00, 2.52s/it]
|
||
100%|██████████| 340/340 [23:51<00:00, 4.21s/it]
|
||
***** train metrics *****
|
||
epoch = 1.0
|
||
total_flos = 0GF
|
||
train_loss = 0.4134
|
||
train_runtime = 0:23:56.87
|
||
train_samples = 43598
|
||
train_samples_per_second = 30.342
|
||
train_steps_per_second = 0.237
|
||
2026-04-10 17:59:29 - INFO - __main__ - *** Training complete ***
|
||
2026-04-10 17:59:29 - INFO - __main__ - *** Save model ***
|
||
[INFO|configuration_utils.py:419] 2026-04-10 17:59:47,763 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/config.json
|
||
[INFO|configuration_utils.py:911] 2026-04-10 17:59:47,771 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/generation_config.json
|
||
[INFO|modeling_utils.py:3580] 2026-04-10 18:00:39,415 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 7 checkpoint shards. You can find where each parameters has been saved in the index located at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/model.safetensors.index.json.
|
||
[INFO|tokenization_utils_base.py:2510] 2026-04-10 18:00:39,450 >> tokenizer config file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/tokenizer_config.json
|
||
[INFO|tokenization_utils_base.py:2519] 2026-04-10 18:00:39,459 >> Special tokens file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/special_tokens_map.json
|
||
2026-04-10 18:00:39 - INFO - __main__ - Saved HF-compatible model artifacts to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009
|
||
[INFO|modelcard.py:450] 2026-04-10 18:00:39,763 >> Dropping the following result as it does not have all the necessary fields:
|
||
{'dataset': {'name': 'Anthropic/hh-rlhf', 'type': 'Anthropic/hh-rlhf'}}
|
||
[INFO|configuration_utils.py:419] 2026-04-10 18:00:39,776 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009/config.json
|
||
2026-04-10 18:00:39 - INFO - __main__ - *** Evaluate ***
|
||
[INFO|trainer.py:4307] 2026-04-10 18:00:39,777 >>
|
||
***** Running Evaluation *****
|
||
[INFO|trainer.py:4309] 2026-04-10 18:00:39,777 >> Num examples = 2339
|
||
[INFO|trainer.py:4312] 2026-04-10 18:00:39,777 >> Batch size = 16
|
||
|
||
0%| | 0/18 [00:00<?, ?it/s]
|
||
11%|█ | 2/18 [00:01<00:08, 1.92it/s]
|
||
17%|█▋ | 3/18 [00:02<00:11, 1.33it/s]
|
||
22%|██▏ | 4/18 [00:03<00:12, 1.13it/s]
|
||
28%|██▊ | 5/18 [00:04<00:13, 1.00s/it]
|
||
33%|███▎ | 6/18 [00:05<00:12, 1.04s/it]
|
||
39%|███▉ | 7/18 [00:06<00:11, 1.07s/it]
|
||
44%|████▍ | 8/18 [00:07<00:10, 1.07s/it]
|
||
50%|█████ | 9/18 [00:08<00:09, 1.08s/it]
|
||
56%|█████▌ | 10/18 [00:10<00:08, 1.12s/it]
|
||
61%|██████ | 11/18 [00:11<00:07, 1.09s/it]
|
||
67%|██████▋ | 12/18 [00:12<00:06, 1.14s/it]
|
||
72%|███████▏ | 13/18 [00:13<00:05, 1.11s/it]
|
||
78%|███████▊ | 14/18 [00:14<00:04, 1.15s/it]
|
||
83%|████████▎ | 15/18 [00:15<00:03, 1.15s/it]
|
||
89%|████████▉ | 16/18 [00:16<00:02, 1.09s/it]
|
||
94%|█████████▍| 17/18 [00:17<00:01, 1.12s/it]
|
||
100%|██████████| 18/18 [00:18<00:00, 1.06s/it]
|
||
100%|██████████| 18/18 [00:19<00:00, 1.06s/it]
|
||
***** eval metrics *****
|
||
epoch = 1.0
|
||
eval_logits/chosen = -0.5745
|
||
eval_logits/rejected = -0.5353
|
||
eval_logps/chosen = -119.713
|
||
eval_logps/ref_chosen = -97.0617
|
||
eval_logps/ref_rejected = -80.1818
|
||
eval_logps/rejected = -113.9829
|
||
eval_loss = 0.4583
|
||
eval_margin_dpo/margin_mean = 11.1499
|
||
eval_margin_dpo/margin_std = 15.0412
|
||
eval_runtime = 0:00:20.26
|
||
eval_samples = 2339
|
||
eval_samples_per_second = 115.397
|
||
eval_steps_per_second = 0.937
|
||
2026-04-10 18:01:00 - INFO - __main__ - *** Training complete! ***
|
||
wandb: - 0.015 MB of 0.015 MB uploaded
|
||
wandb: \ 0.015 MB of 0.015 MB uploaded
|
||
wandb: | 0.015 MB of 0.015 MB uploaded
|
||
wandb: / 0.015 MB of 0.015 MB uploaded
|
||
wandb: - 0.048 MB of 0.079 MB uploaded (0.002 MB deduped)
|
||
wandb: \ 0.051 MB of 0.080 MB uploaded (0.002 MB deduped)
|
||
wandb:
|
||
wandb: Run history:
|
||
wandb: eval/logits/chosen ▁▅▇█
|
||
wandb: eval/logits/rejected ▁▅▇█
|
||
wandb: eval/logps/chosen █▁▁▁
|
||
wandb: eval/logps/ref_chosen ▁▁▁▁
|
||
wandb: eval/logps/ref_rejected ▁▁▁▁
|
||
wandb: eval/logps/rejected █▁▁▁
|
||
wandb: eval/loss █▂▁▁
|
||
wandb: eval/margin_dpo/margin_mean ▁▇██
|
||
wandb: eval/margin_dpo/margin_std ▁▇██
|
||
wandb: eval/runtime █▃▃▁
|
||
wandb: eval/samples_per_second ▁▆▆█
|
||
wandb: eval/steps_per_second ▁▇▆█
|
||
wandb: train/epoch ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
|
||
wandb: train/global_step ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
|
||
wandb: train/grad_norm ▅▅▇▅▄▂█▃▁▁▂▂▂▂▄▃▃▁▂▂▃▂▂▂▃▃▃▂▄▄▃▂▃▄▁▃▂▄▂▃
|
||
wandb: train/learning_rate ▁▂▄▆▇██████▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁
|
||
wandb: train/logits/chosen █▇▆▇▄▂▂▂▂▁▂▂▃▁▂▁▁▁▂▃▂▄▂▄▅▅▄▃▅▃▄▄▄▃▆▄▇▃▄▅
|
||
wandb: train/logits/rejected █▆▆▆▄▃▂▂▂▁▂▃▃▁▂▁▁▁▂▃▂▃▃▄▅▆▃▃▆▄▅▅▄▄▅▄▇▄▅▅
|
||
wandb: train/logps/chosen ▆▆▇█▆▇▆▁▅▆▅▆▅▄▅▄▃▄▃▄▄▅▃▄▂▂▄▄▃▂▃▃▃▁▅▁▃▃▃▅
|
||
wandb: train/logps/ref_chosen ▆▆▇█▅▇▆▁▆▆▅▇▆▅▅▅▅▅▅▆▆█▅▆▄▅▇▇▆▄▅▅▅▃█▄▆▆▆▇
|
||
wandb: train/logps/ref_rejected ▆▆▆▆▄▄▄▃▄▄▃█▂▃▄▂▂▅▅▃▃▃▆▆▆▄▂▃▇▃▂▄▄▅▁▂▄▇▂▆
|
||
wandb: train/logps/rejected ████▇▆▅▅▆▅▅▇▄▄▄▃▃▄▄▃▃▂▄▄▄▂▁▂▄▃▁▃▂▃▁▁▂▅▁▃
|
||
wandb: train/loss ███▇▇▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▂▃▂▁▁▁▁▁▁▂▁▁
|
||
wandb: train/margin_dpo/margin_mean ▁▁▁▁▂▃▂▃▃▄▃▃▄▅▅▅▅▆▅▇▆▇▆▆▅▆▇▇▇▆█▇▇▇▆█▇▆█▇
|
||
wandb: train/margin_dpo/margin_std ▁▁▁▁▂▄▄▅▃▄▄▅▅▅▅▇▆▆▆▆▅▆▆▆▆██▆▇▆▇▆█▇▇▆▇▆█▇
|
||
wandb:
|
||
wandb: Run summary:
|
||
wandb: eval/logits/chosen -0.57455
|
||
wandb: eval/logits/rejected -0.53526
|
||
wandb: eval/logps/chosen -119.71296
|
||
wandb: eval/logps/ref_chosen -97.06174
|
||
wandb: eval/logps/ref_rejected -80.18183
|
||
wandb: eval/logps/rejected -113.98293
|
||
wandb: eval/loss 0.45825
|
||
wandb: eval/margin_dpo/margin_mean 11.14989
|
||
wandb: eval/margin_dpo/margin_std 15.04124
|
||
wandb: eval/runtime 20.2691
|
||
wandb: eval/samples_per_second 115.397
|
||
wandb: eval/steps_per_second 0.937
|
||
wandb: total_flos 0.0
|
||
wandb: train/epoch 1.0
|
||
wandb: train/global_step 340
|
||
wandb: train/grad_norm 19.59518
|
||
wandb: train/learning_rate 0.0
|
||
wandb: train/logits/chosen -0.57869
|
||
wandb: train/logits/rejected -0.54412
|
||
wandb: train/logps/chosen -85.47109
|
||
wandb: train/logps/ref_chosen -64.36442
|
||
wandb: train/logps/ref_rejected -73.83574
|
||
wandb: train/logps/rejected -113.63161
|
||
wandb: train/loss 0.3138
|
||
wandb: train/margin_dpo/margin_mean 18.68921
|
||
wandb: train/margin_dpo/margin_std 18.12706
|
||
wandb: train_loss 0.41337
|
||
wandb: train_runtime 1436.8705
|
||
wandb: train_samples_per_second 30.342
|
||
wandb: train_steps_per_second 0.237
|
||
wandb:
|
||
wandb: 🚀 View run llama-3-8b-base-margin-dpo-hh-helpful-8xh200-20260410-172009 at: https://wandb.ai/can-not-fand-northeastern-university/huggingface/runs/wep2te2x
|
||
wandb: ⭐️ View project at: https://wandb.ai/can-not-fand-northeastern-university/huggingface
|
||
wandb: Synced 6 W&B file(s), 0 media file(s), 2 artifact file(s) and 0 other file(s)
|
||
wandb: Find logs at: /scratch/feng.yulu/dynamic-dpo-v4/wandb/wandb/run-20260410_173535-wep2te2x/logs
|
||
wandb: WARNING The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require("core")`! See https://wandb.me/wandb-core for more information.
|