796 lines
177 KiB
Plaintext
796 lines
177 KiB
Plaintext
|
|
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
|||
|
|
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
|||
|
|
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
|||
|
|
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
|||
|
|
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
|||
|
|
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
|||
|
|
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
|||
|
|
[W CUDAAllocatorConfig.h:28] Warning: expandable_segments not supported on this platform (function operator())
|
|||
|
|
2026-04-10 21:56:48 - INFO - __main__ - Model parameters ModelArguments(base_model_revision=None, model_name_or_path='/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758', model_revision='main', model_code_revision=None, torch_dtype='bfloat16', tokenizer_name_or_path=None, trust_remote_code=False, attn_implementation='flash_attention_2', use_peft=False, lora_r=16, lora_alpha=32, lora_dropout=0.05, lora_target_modules=None, lora_modules_to_save=None, load_in_8bit=False, load_in_4bit=False, bnb_4bit_quant_type='nf4', use_bnb_nested_quant=False, bnb_4bit_quant_storage='uint8')
|
|||
|
|
2026-04-10 21:56:48 - INFO - __main__ - Data parameters DataArguments(chat_template=None, dataset_mixer={'Anthropic/hh-rlhf': 1.0}, text_column='text', dataset_splits=['train', 'test'], dataset_configs=['helpful-base'], dataset_dir=None, preprocessing_num_workers=12, use_persistent_hf_cache=True, hf_cache_dir='/scratch/feng.yulu/dynamic-dpo-v4/hf/datasets', truncation_side=None, auto_insert_empty_system_msg=True, preprocessing_log_samples=0, preprocessing_log_dir=None)
|
|||
|
|
2026-04-10 21:56:48 - INFO - __main__ - Training/evaluation parameters BetaDPOConfig(
|
|||
|
|
_n_gpu=1,
|
|||
|
|
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
|
|||
|
|
adafactor=False,
|
|||
|
|
adam_beta1=0.9,
|
|||
|
|
adam_beta2=0.999,
|
|||
|
|
adam_epsilon=1e-08,
|
|||
|
|
alpha=0.6,
|
|||
|
|
auto_find_batch_size=False,
|
|||
|
|
average_tokens_across_devices=False,
|
|||
|
|
batch_eval_metrics=False,
|
|||
|
|
beta=0.1,
|
|||
|
|
beta_min=0.001,
|
|||
|
|
bf16=True,
|
|||
|
|
bf16_full_eval=False,
|
|||
|
|
data_seed=None,
|
|||
|
|
dataloader_drop_last=True,
|
|||
|
|
dataloader_num_workers=0,
|
|||
|
|
dataloader_persistent_workers=False,
|
|||
|
|
dataloader_pin_memory=True,
|
|||
|
|
dataloader_prefetch_factor=None,
|
|||
|
|
dataset_num_proc=12,
|
|||
|
|
ddp_backend=None,
|
|||
|
|
ddp_broadcast_buffers=None,
|
|||
|
|
ddp_bucket_cap_mb=None,
|
|||
|
|
ddp_find_unused_parameters=None,
|
|||
|
|
ddp_timeout=1800,
|
|||
|
|
debug=[],
|
|||
|
|
deepspeed=None,
|
|||
|
|
deterministic_eval=True,
|
|||
|
|
disable_dropout=True,
|
|||
|
|
disable_tqdm=False,
|
|||
|
|
do_eval=True,
|
|||
|
|
do_predict=False,
|
|||
|
|
do_train=False,
|
|||
|
|
ema_momentum=0.9,
|
|||
|
|
eval_accumulation_steps=None,
|
|||
|
|
eval_delay=0,
|
|||
|
|
eval_do_concat_batches=True,
|
|||
|
|
eval_on_start=False,
|
|||
|
|
eval_steps=100,
|
|||
|
|
eval_strategy=IntervalStrategy.STEPS,
|
|||
|
|
eval_use_gather_object=False,
|
|||
|
|
f_alpha_divergence_coef=1.0,
|
|||
|
|
f_divergence_type=FDivergenceType.REVERSE_KL,
|
|||
|
|
force_use_ref_model=False,
|
|||
|
|
fp16=False,
|
|||
|
|
fp16_backend=auto,
|
|||
|
|
fp16_full_eval=False,
|
|||
|
|
fp16_opt_level=O1,
|
|||
|
|
fsdp=[],
|
|||
|
|
fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False},
|
|||
|
|
fsdp_min_num_params=0,
|
|||
|
|
fsdp_transformer_layer_cls_to_wrap=None,
|
|||
|
|
full_determinism=False,
|
|||
|
|
generate_during_eval=False,
|
|||
|
|
gradient_accumulation_steps=1,
|
|||
|
|
gradient_checkpointing=True,
|
|||
|
|
gradient_checkpointing_kwargs={'use_reentrant': False},
|
|||
|
|
greater_is_better=None,
|
|||
|
|
group_by_length=False,
|
|||
|
|
half_precision_backend=auto,
|
|||
|
|
hub_always_push=False,
|
|||
|
|
hub_model_id=W-61/llama-3-8b-base-beta-dpo-hh-helpful-4xh200,
|
|||
|
|
hub_model_revision=main,
|
|||
|
|
hub_private_repo=None,
|
|||
|
|
hub_strategy=HubStrategy.EVERY_SAVE,
|
|||
|
|
hub_token=<HUB_TOKEN>,
|
|||
|
|
ignore_data_skip=False,
|
|||
|
|
include_for_metrics=[],
|
|||
|
|
include_inputs_for_metrics=False,
|
|||
|
|
include_num_input_tokens_seen=False,
|
|||
|
|
include_tokens_per_second=False,
|
|||
|
|
is_encoder_decoder=None,
|
|||
|
|
jit_mode_eval=False,
|
|||
|
|
label_names=None,
|
|||
|
|
label_pad_token_id=-100,
|
|||
|
|
label_smoothing=0.0,
|
|||
|
|
label_smoothing_factor=0.0,
|
|||
|
|
learning_rate=5e-07,
|
|||
|
|
length_column_name=length,
|
|||
|
|
load_best_model_at_end=False,
|
|||
|
|
local_rank=0,
|
|||
|
|
log_level=info,
|
|||
|
|
log_level_replica=warning,
|
|||
|
|
log_on_each_node=True,
|
|||
|
|
logging_dir=outputs/llama-3-8b-base-beta-dpo-hh-helpful-4xh200/runs/Apr10_21-56-46_d4054,
|
|||
|
|
logging_first_step=True,
|
|||
|
|
logging_nan_inf_filter=True,
|
|||
|
|
logging_steps=5,
|
|||
|
|
logging_strategy=IntervalStrategy.STEPS,
|
|||
|
|
loss_type=sigmoid,
|
|||
|
|
lr_scheduler_kwargs={},
|
|||
|
|
lr_scheduler_type=SchedulerType.COSINE,
|
|||
|
|
max_grad_norm=1.0,
|
|||
|
|
max_length=512,
|
|||
|
|
max_prompt_length=256,
|
|||
|
|
max_steps=-1,
|
|||
|
|
max_target_length=None,
|
|||
|
|
metric_for_best_model=None,
|
|||
|
|
model_adapter_name=None,
|
|||
|
|
model_init_kwargs=None,
|
|||
|
|
mp_parameters=,
|
|||
|
|
neftune_noise_alpha=None,
|
|||
|
|
no_cuda=False,
|
|||
|
|
non_finite_logits_handling=sanitize,
|
|||
|
|
num_train_epochs=1,
|
|||
|
|
optim=OptimizerNames.ADAMW_TORCH,
|
|||
|
|
optim_args=None,
|
|||
|
|
optim_target_modules=None,
|
|||
|
|
output_dir=/scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627,
|
|||
|
|
overwrite_output_dir=False,
|
|||
|
|
padding_value=None,
|
|||
|
|
past_index=-1,
|
|||
|
|
per_device_eval_batch_size=16,
|
|||
|
|
per_device_train_batch_size=16,
|
|||
|
|
post_tokenization_log_dir=None,
|
|||
|
|
post_tokenization_log_samples=0,
|
|||
|
|
precompute_ref_batch_size=None,
|
|||
|
|
precompute_ref_eval_batch_size=None,
|
|||
|
|
precompute_ref_log_probs=False,
|
|||
|
|
prediction_loss_only=False,
|
|||
|
|
push_to_hub=False,
|
|||
|
|
push_to_hub_model_id=None,
|
|||
|
|
push_to_hub_organization=None,
|
|||
|
|
push_to_hub_token=<PUSH_TO_HUB_TOKEN>,
|
|||
|
|
ray_scope=last,
|
|||
|
|
ref_adapter_name=None,
|
|||
|
|
ref_model_init_kwargs=None,
|
|||
|
|
ref_model_mixup_alpha=0.9,
|
|||
|
|
ref_model_sync_steps=64,
|
|||
|
|
reference_free=False,
|
|||
|
|
remove_unused_columns=False,
|
|||
|
|
report_to=['wandb'],
|
|||
|
|
require_equal_local_batch_size=True,
|
|||
|
|
restore_callback_states_from_checkpoint=False,
|
|||
|
|
resume_from_checkpoint=None,
|
|||
|
|
reuse_tokenized_dataset=True,
|
|||
|
|
rho=0.8,
|
|||
|
|
rpo_alpha=None,
|
|||
|
|
run_name=llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627,
|
|||
|
|
save_on_each_node=False,
|
|||
|
|
save_only_model=False,
|
|||
|
|
save_safetensors=True,
|
|||
|
|
save_steps=200,
|
|||
|
|
save_strategy=SaveStrategy.STEPS,
|
|||
|
|
save_total_limit=2,
|
|||
|
|
seed=42,
|
|||
|
|
sft_weight=0.0,
|
|||
|
|
skip_memory_metrics=True,
|
|||
|
|
sync_global_mask=True,
|
|||
|
|
sync_ref_model=False,
|
|||
|
|
tf32=None,
|
|||
|
|
tokenization_batch_size=128,
|
|||
|
|
tokenization_mode=online,
|
|||
|
|
tokenized_dataset_cache_dir=/scratch/feng.yulu/dynamic-dpo-v4/tokenized_preferences,
|
|||
|
|
torch_compile=False,
|
|||
|
|
torch_compile_backend=None,
|
|||
|
|
torch_compile_mode=None,
|
|||
|
|
torch_empty_cache_steps=None,
|
|||
|
|
torchdynamo=None,
|
|||
|
|
tp_size=0,
|
|||
|
|
tpu_metrics_debug=False,
|
|||
|
|
tpu_num_cores=None,
|
|||
|
|
trainer_type=beta_dpo,
|
|||
|
|
truncation_mode=keep_end,
|
|||
|
|
use_cpu=False,
|
|||
|
|
use_ipex=False,
|
|||
|
|
use_legacy_prediction_loop=False,
|
|||
|
|
use_liger_kernel=False,
|
|||
|
|
use_mps_device=False,
|
|||
|
|
warmup_ratio=0.1,
|
|||
|
|
warmup_steps=0,
|
|||
|
|
weight_decay=0.0,
|
|||
|
|
)
|
|||
|
|
2026-04-10 21:56:48 - INFO - __main__ - Beta-DPO parameters: beta=0.1, rho=0.8, alpha=0.6, ema_momentum=0.9
|
|||
|
|
2026-04-10 21:56:48 - INFO - __main__ - Using persistent HF datasets cache at /scratch/feng.yulu/dynamic-dpo-v4/hf/datasets
|
|||
|
|
2026-04-10 21:56:52 - WARNING - __main__ - Dropped 237 non-canonical HH preference examples from split `train` before normalization (126 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 111 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
|
|||
|
|
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
Normalizing raw HH preferences (train): 2%|▏ | 1000/43598 [00:00<00:04, 9943.59 examples/s]
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
Normalizing raw HH preferences (train): 5%|▌ | 2267/43598 [00:00<00:03, 11541.43 examples/s]
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
Normalizing raw HH preferences (train): 2%|▏ | 1042/43598 [00:00<00:04, 10372.58 examples/s]
Normalizing raw HH preferences (train): 8%|▊ | 3533/43598 [00:00<00:03, 12048.20 examples/s]
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
Normalizing raw HH preferences (train): 3%|▎ | 1124/43598 [00:00<00:03, 11180.33 examples/s]
Normalizing raw HH preferences (train): 5%|▌ | 2339/43598 [00:00<00:03, 11886.33 examples/s]
Normalizing raw HH preferences (train): 11%|█ | 4786/43598 [00:00<00:03, 12234.62 examples/s]
Normalizing raw HH preferences (train): 3%|▎ | 1165/43598 [00:00<00:03, 11591.09 examples/s]
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
Normalizing raw HH preferences (train): 6%|▌ | 2431/43598 [00:00<00:03, 12286.89 examples/s]
Normalizing raw HH preferences (train): 8%|▊ | 3702/43598 [00:00<00:03, 12386.05 examples/s]
Normalizing raw HH preferences (train): 6%|▌ | 2460/43598 [00:00<00:03, 12381.49 examples/s]
Normalizing raw HH preferences (train): 15%|█▌ | 6679/43598 [00:00<00:03, 12215.29 examples/s]
Normalizing raw HH preferences (train): 3%|▎ | 1170/43598 [00:00<00:03, 11649.66 examples/s]
Normalizing raw HH preferences (train): 9%|▊ | 3727/43598 [00:00<00:03, 12589.29 examples/s]
Normalizing raw HH preferences (train): 11%|█▏ | 4968/43598 [00:00<00:03, 12486.40 examples/s]
Normalizing raw HH preferences (train): 9%|▊ | 3748/43598 [00:00<00:03, 12607.71 examples/s]
Normalizing raw HH preferences (train): 18%|█▊ | 7942/43598 [00:00<00:02, 12336.77 examples/s]
Normalizing raw HH preferences (train): 6%|▌ | 2476/43598 [00:00<00:03, 12472.37 examples/s]
Normalizing raw HH preferences (train): 11%|█▏ | 5000/43598 [00:00<00:03, 12281.62 examples/s]
Normalizing raw HH preferences (train): 16%|█▌ | 6833/43598 [00:00<00:02, 12460.30 examples/s]
Normalizing raw HH preferences (train): 13%|█▎ | 5705/43598 [00:00<00:03, 12535.58 examples/s]
Normalizing raw HH preferences (train): 9%|▊ | 3775/43598 [00:00<00:03, 12703.36 examples/s]
Normalizing raw HH preferences (train): 14%|█▍ | 6292/43598 [00:00<00:02, 12505.51 examples/s]
Normalizing raw HH preferences (train): 22%|██▏ | 9788/43598 [00:00<00:02, 12319.70 examples/s]
Normalizing raw HH preferences (train): 16%|█▌ | 6976/43598 [00:00<00:02, 12588.46 examples/s]
Normalizing raw HH preferences (train): 17%|█▋ | 7586/43598 [00:00<00:02, 12648.44 examples/s]
Normalizing raw HH preferences (train): 20%|█▉ | 8711/43598 [00:00<00:02, 12480.19 examples/s]
Normalizing raw HH preferences (train): 13%|█▎ | 5712/43598 [00:00<00:02, 12639.02 examples/s]
Normalizing raw HH preferences (train): 27%|██▋ | 11683/43598 [00:00<00:02, 12356.56 examples/s]
Normalizing raw HH preferences (train): 20%|██ | 8865/43598 [00:00<00:02, 12688.38 examples/s]
Normalizing raw HH preferences (train): 23%|██▎ | 10000/43598 [00:00<00:02, 12354.38 examples/s]
Normalizing raw HH preferences (train): 20%|██ | 8859/43598 [00:00<00:02, 12572.30 examples/s]
Normalizing raw HH preferences (train): 16%|█▌ | 6992/43598 [00:00<00:02, 12686.73 examples/s]
Normalizing raw HH preferences (train): 26%|██▌ | 11303/43598 [00:00<00:02, 12538.90 examples/s]
Normalizing raw HH preferences (t
|
|||
|
|
Normalizing raw HH preferences (train): 98%|█████████▊| 42817/43598 [00:03<00:00, 12460.42 examples/s]
Normalizing raw HH preferences (train): 96%|█████████▌| 41829/43598 [00:03<00:00, 12454.96 examples/s]
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 11250.93 examples/s]
|
|||
|
|
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 11191.42 examples/s]
|
|||
|
|
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 10561.02 examples/s]
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 11260.27 examples/s]
|
|||
|
|
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 11506.85 examples/s]
|
|||
|
|
2026-04-10 21:56:57 - WARNING - __main__ - Dropped 15 non-canonical HH preference examples from split `test` before normalization (9 x HH preprocessing expects exactly one final assistant response in chosen/rejected suffixes., 6 x HH chosen/rejected transcripts must each contain a divergent assistant response.).
|
|||
|
|
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
Normalizing raw HH preferences (test): 48%|████▊ | 1121/2339 [00:00<00:00, 11166.48 examples/s]
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 11265.44 examples/s]
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10592.28 examples/s]
|
|||
|
|
2026-04-10 21:56:57 - INFO - __main__ - Training on the following splits: ['train : 43598', 'test : 2339']
|
|||
|
|
[INFO|tokenization_utils_base.py:2058] 2026-04-10 21:56:57,340 >> loading file tokenizer.json
|
|||
|
|
[INFO|tokenization_utils_base.py:2058] 2026-04-10 21:56:57,340 >> loading file tokenizer.model
|
|||
|
|
[INFO|tokenization_utils_base.py:2058] 2026-04-10 21:56:57,340 >> loading file added_tokens.json
|
|||
|
|
[INFO|tokenization_utils_base.py:2058] 2026-04-10 21:56:57,340 >> loading file special_tokens_map.json
|
|||
|
|
[INFO|tokenization_utils_base.py:2058] 2026-04-10 21:56:57,340 >> loading file tokenizer_config.json
|
|||
|
|
[INFO|tokenization_utils_base.py:2058] 2026-04-10 21:56:57,340 >> loading file chat_template.jinja
|
|||
|
|
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
Normalizing raw HH preferences (test): 50%|█████ | 1181/2339 [00:00<00:00, 11765.77 examples/s]
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
Normalizing raw HH preferences (test): 50%|████▉ | 1163/2339 [00:00<00:00, 11583.20 examples/s]
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10846.01 examples/s]
|
|||
|
|
Normalizing raw HH preferences (test): 48%|████▊ | 1122/2339 [00:00<00:00, 11174.51 examples/s]
Normalizing raw HH preferences (test): 50%|█████ | 1178/2339 [00:00<00:00, 11733.71 examples/s]
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 11364.36 examples/s]
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10688.05 examples/s]
|
|||
|
|
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 11240.29 examples/s]
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10509.72 examples/s]
|
|||
|
|
[INFO|tokenization_utils_base.py:2323] 2026-04-10 21:56:57,723 >> Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
|
|||
|
|
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10872.73 examples/s]
|
|||
|
|
2026-04-10 21:56:58 - INFO - __main__ - Processed train sample 41905:
|
|||
|
|
|
|||
|
|
Prompt:
|
|||
|
|
<|begin_of_text|><|start_header_id|>user<|end_header_id|>
|
|||
|
|
|
|||
|
|
What is the best way to clean my refrigerator?<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
|||
|
|
|
|||
|
|
Well, I have a few ideas, but the best way to know what works best is to run some tests and then analyze the data to figure out which technique works best.<|eot_id|><|start_header_id|>user<|end_header_id|>
|
|||
|
|
|
|||
|
|
What are your ideas?<|eot_id|>
|
|||
|
|
|
|||
|
|
Chosen:
|
|||
|
|
<|start_header_id|>assistant<|end_header_id|>
|
|||
|
|
|
|||
|
|
Some of my ideas include wiping down the interior with a rag, using bleach and water to wipe down the door, emptying the contents of the fridge to scrub the shelves with a clean cloth, cleaning the interior of the fridge with vinegar and water, and using a vacuum cleaner to clean the interior.<|eot_id|>
|
|||
|
|
|
|||
|
|
Rejected:
|
|||
|
|
<|start_header_id|>assistant<|end_header_id|>
|
|||
|
|
|
|||
|
|
I was thinking that you could try different types of soap, which could be a variation of “soap,” like antibacterial soap. I was also thinking that you could try using different tools, like a toothbrush, to clean the inside. Or I was thinking that you could try different ways to get the gunk out, like using a squeegee, or using a paper towel. I was also thinking you could try other types of cleaning, like vacuuming, but I think that could have the opposite of the desired effect.<|eot_id|>
|
|||
|
|
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
|||
|
|
warnings.warn(
|
|||
|
|
[INFO|configuration_utils.py:691] 2026-04-10 21:56:58,031 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/config.json
|
|||
|
|
[INFO|configuration_utils.py:765] 2026-04-10 21:56:58,032 >> Model config LlamaConfig {
|
|||
|
|
"architectures": [
|
|||
|
|
"LlamaForCausalLM"
|
|||
|
|
],
|
|||
|
|
"attention_bias": false,
|
|||
|
|
"attention_dropout": 0.0,
|
|||
|
|
"bos_token_id": 128000,
|
|||
|
|
"eos_token_id": 128001,
|
|||
|
|
"head_dim": 128,
|
|||
|
|
"hidden_act": "silu",
|
|||
|
|
"hidden_size": 4096,
|
|||
|
|
"initializer_range": 0.02,
|
|||
|
|
"intermediate_size": 14336,
|
|||
|
|
"max_position_embeddings": 8192,
|
|||
|
|
"mlp_bias": false,
|
|||
|
|
"model_type": "llama",
|
|||
|
|
"num_attention_heads": 32,
|
|||
|
|
"num_hidden_layers": 32,
|
|||
|
|
"num_key_value_heads": 8,
|
|||
|
|
"pretraining_tp": 1,
|
|||
|
|
"rms_norm_eps": 1e-05,
|
|||
|
|
"rope_scaling": null,
|
|||
|
|
"rope_theta": 500000.0,
|
|||
|
|
"tie_word_embeddings": false,
|
|||
|
|
"torch_dtype": "bfloat16",
|
|||
|
|
"transformers_version": "4.51.0",
|
|||
|
|
"use_cache": false,
|
|||
|
|
"vocab_size": 128256
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
[INFO|modeling_utils.py:1121] 2026-04-10 21:56:58,042 >> loading weights file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/model.safetensors.index.json
|
|||
|
|
[INFO|modeling_utils.py:2167] 2026-04-10 21:56:58,043 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.
|
|||
|
|
[WARNING|logging.py:328] 2026-04-10 21:56:58,044 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
|||
|
|
[INFO|configuration_utils.py:1142] 2026-04-10 21:56:58,045 >> Generate config GenerationConfig {
|
|||
|
|
"bos_token_id": 128000,
|
|||
|
|
"eos_token_id": 128001,
|
|||
|
|
"use_cache": false
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
|||
|
|
warnings.warn(
|
|||
|
|
[WARNING|logging.py:328] 2026-04-10 21:56:58,252 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
|||
|
|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 714.12it/s]
|
|||
|
|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
|||
|
|
warnings.warn(
|
|||
|
|
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
|||
|
|
warnings.warn(
|
|||
|
|
[WARNING|logging.py:328] 2026-04-10 21:56:58,391 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
|||
|
|
[WARNING|logging.py:328] 2026-04-10 21:56:58,404 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
|||
|
|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 808.42it/s]
|
|||
|
|
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
|||
|
|
warnings.warn(
|
|||
|
|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 791.14it/s]
|
|||
|
|
[WARNING|logging.py:328] 2026-04-10 21:56:58,448 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
|||
|
|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
Loading checkpoint shards: 14%|█▍ | 1/7 [00:00<00:00, 6.85it/s]
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 45.93it/s]
|
|||
|
|
[WARNING|trainer.py:821] 2026-04-10 21:56:58,486 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
|||
|
|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 778.16it/s]
|
|||
|
|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
Loading checkpoint shards: 14%|█▍ | 1/7 [00:00<00:01, 5.84it/s]
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 39.23it/s]
|
|||
|
|
[WARNING|trainer.py:821] 2026-04-10 21:56:58,650 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
|||
|
|
Loading checkpoint shards: 14%|█▍ | 1/7 [00:00<00:01, 5.83it/s]
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 39.20it/s]
|
|||
|
|
[WARNING|trainer.py:821] 2026-04-10 21:56:58,664 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
|||
|
|
Loading checkpoint shards: 14%|█▍ | 1/7 [00:00<00:00, 6.88it/s]
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 46.17it/s]
|
|||
|
|
[WARNING|trainer.py:821] 2026-04-10 21:56:58,691 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
|||
|
|
Loading checkpoint shards: 14%|█▍ | 1/7 [00:01<00:08, 1.34s/it]
Loading checkpoint shards: 29%|██▊ | 2/7 [00:02<00:06, 1.32s/it]
Loading checkpoint shards: 43%|████▎ | 3/7 [00:03<00:05, 1.33s/it]
Loading checkpoint shards: 57%|█████▋ | 4/7 [00:05<00:04, 1.34s/it]
Loading checkpoint shards: 71%|███████▏ | 5/7 [00:06<00:02, 1.33s/it]
Loading checkpoint shards: 86%|████████▌ | 6/7 [00:07<00:01, 1.33s/it]
Loading checkpoint shards: 100%|██████████| 7/7 [00:08<00:00, 1.12s/it]
Loading checkpoint shards: 100%|██████████| 7/7 [00:08<00:00, 1.24s/it]
|
|||
|
|
[INFO|modeling_utils.py:4926] 2026-04-10 21:57:06,758 >> All model checkpoint weights were used when initializing LlamaForCausalLM.
|
|||
|
|
|
|||
|
|
[INFO|modeling_utils.py:4934] 2026-04-10 21:57:06,758 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758.
|
|||
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
|
|||
|
|
[INFO|configuration_utils.py:1095] 2026-04-10 21:57:06,760 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/generation_config.json
|
|||
|
|
[INFO|configuration_utils.py:1142] 2026-04-10 21:57:06,761 >> Generate config GenerationConfig {
|
|||
|
|
"bos_token_id": 128000,
|
|||
|
|
"do_sample": true,
|
|||
|
|
"eos_token_id": 128001,
|
|||
|
|
"max_length": 4096,
|
|||
|
|
"temperature": 0.6,
|
|||
|
|
"top_p": 0.9
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
[INFO|configuration_utils.py:691] 2026-04-10 21:57:06,762 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/config.json
|
|||
|
|
[INFO|configuration_utils.py:765] 2026-04-10 21:57:06,762 >> Model config LlamaConfig {
|
|||
|
|
"architectures": [
|
|||
|
|
"LlamaForCausalLM"
|
|||
|
|
],
|
|||
|
|
"attention_bias": false,
|
|||
|
|
"attention_dropout": 0.0,
|
|||
|
|
"bos_token_id": 128000,
|
|||
|
|
"eos_token_id": 128001,
|
|||
|
|
"head_dim": 128,
|
|||
|
|
"hidden_act": "silu",
|
|||
|
|
"hidden_size": 4096,
|
|||
|
|
"initializer_range": 0.02,
|
|||
|
|
"intermediate_size": 14336,
|
|||
|
|
"max_position_embeddings": 8192,
|
|||
|
|
"mlp_bias": false,
|
|||
|
|
"model_type": "llama",
|
|||
|
|
"num_attention_heads": 32,
|
|||
|
|
"num_hidden_layers": 32,
|
|||
|
|
"num_key_value_heads": 8,
|
|||
|
|
"pretraining_tp": 1,
|
|||
|
|
"rms_norm_eps": 1e-05,
|
|||
|
|
"rope_scaling": null,
|
|||
|
|
"rope_theta": 500000.0,
|
|||
|
|
"tie_word_embeddings": false,
|
|||
|
|
"torch_dtype": "bfloat16",
|
|||
|
|
"transformers_version": "4.51.0",
|
|||
|
|
"use_cache": false,
|
|||
|
|
"vocab_size": 128256
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
[INFO|modeling_utils.py:1121] 2026-04-10 21:57:06,763 >> loading weights file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/model.safetensors.index.json
|
|||
|
|
[INFO|modeling_utils.py:2167] 2026-04-10 21:57:06,764 >> Instantiating LlamaForCausalLM model under default dtype torch.bfloat16.
|
|||
|
|
[INFO|configuration_utils.py:1142] 2026-04-10 21:57:06,766 >> Generate config GenerationConfig {
|
|||
|
|
"bos_token_id": 128000,
|
|||
|
|
"eos_token_id": 128001,
|
|||
|
|
"use_cache": false
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
Loading checkpoint shards: 14%|█▍ | 1/7 [00:01<00:07, 1.31s/it]
Loading checkpoint shards: 29%|██▊ | 2/7 [00:02<00:06, 1.28s/it]
Loading checkpoint shards: 43%|████▎ | 3/7 [00:03<00:05, 1.29s/it]
Loading checkpoint shards: 57%|█████▋ | 4/7 [00:05<00:03, 1.29s/it]
Loading checkpoint shards: 71%|███████▏ | 5/7 [00:06<00:02, 1.28s/it]
Loading checkpoint shards: 86%|████████▌ | 6/7 [00:07<00:01, 1.27s/it]
Loading checkpoint shards: 100%|██████████| 7/7 [00:08<00:00, 1.08s/it]
Loading checkpoint shards: 100%|██████████| 7/7 [00:08<00:00, 1.19s/it]
|
|||
|
|
[INFO|modeling_utils.py:4926] 2026-04-10 21:57:15,285 >> All model checkpoint weights were used when initializing LlamaForCausalLM.
|
|||
|
|
|
|||
|
|
[INFO|modeling_utils.py:4934] 2026-04-10 21:57:15,285 >> All the weights of LlamaForCausalLM were initialized from the model checkpoint at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758.
|
|||
|
|
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
|
|||
|
|
[INFO|configuration_utils.py:1095] 2026-04-10 21:57:15,287 >> loading configuration file /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-sft-hh-helpful-8xh200-20260410-133758/generation_config.json
|
|||
|
|
[INFO|configuration_utils.py:1142] 2026-04-10 21:57:15,288 >> Generate config GenerationConfig {
|
|||
|
|
"bos_token_id": 128000,
|
|||
|
|
"do_sample": true,
|
|||
|
|
"eos_token_id": 128001,
|
|||
|
|
"max_length": 4096,
|
|||
|
|
"temperature": 0.6,
|
|||
|
|
"top_p": 0.9
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
[WARNING|trainer.py:821] 2026-04-10 21:57:15,289 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 21:57:15,289 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
Normalizing raw HH preferences (train): 3%|▎ | 1161/43598 [00:00<00:03, 11555.71 examples/s]
Normalizing raw HH preferences (train): 3%|▎ | 1136/43598 [00:00<00:03, 11274.16 examples/s]
Normalizing raw HH preferences (train): 6%|▌ | 2460/43598 [00:00<00:03, 12391.92 examples/s]
Normalizing raw HH preferences (train): 5%|▌ | 2383/43598 [00:00<00:03, 11972.56 examples/s]
Normalizing raw HH preferences (train): 9%|▊ | 3750/43598 [00:00<00:03, 12618.95 examples/s]
Normalizing raw HH preferences (train): 0%| | 0/43598 [00:00<?, ? examples/s]
Normalizing raw HH preferences (train): 8%|▊ | 3681/43598 [00:00<00:03, 12215.07 examples/s]
Normalizing raw HH preferences (train): 3%|▎ | 1124/43598 [00:00<00:03, 11176.14 examples/s]
Normalizing raw HH preferences (train): 13%|█▎ | 5709/43598 [00:00<00:03, 12564.59 examples/s]
Normalizing raw HH preferences (train): 11%|█▏ | 4908/43598 [00:00<00:03, 12232.62 examples/s]
Normalizing raw HH preferences (train): 5%|▌ | 2392/43598 [00:00<00:03, 12053.72 examples/s]
Normalizing raw HH preferences (train): 16%|█▌ | 6979/43598 [00:00<00:02, 12605.84 examples/s]
Normalizing raw HH preferences (train): 15%|█▌ | 6702/43598 [00:00<00:03, 12102.54 examples/s]
Normalizing raw HH preferences (train): 8%|▊ | 3689/43598 [00:00<00:03, 12320.12 examples/s]
Normalizing raw HH preferences (train): 20%|██ | 8866/43598 [00:00<00:02, 12590.74 examples/s]
Normalizing raw HH preferences (train): 18%|█▊ | 7947/43598 [00:00<00:02, 12203.39 examples/s]
Normalizing raw HH preferences (train): 11%|█▏ | 4927/43598 [00:00<00:03, 12335.52 examples/s]
Normalizing raw HH preferences (train): 25%|██▍ | 10762/43598 [00:00<00:02, 12604.40 examples/s]
Normalizing raw HH preferences (train): 22%|██▏ | 9774/43598 [00:00<00:02, 12192.53 examples/s]
Normalizing raw HH preferences (train): 16%|█▌ | 6759/43598 [00:00<00:03, 12275.92 examples/s]
Normalizing raw HH preferences (train): 25%|██▌ | 11000/43598 [00:00<00:02, 12057.51 examples/s]
Normalizing raw HH preferences (train): 18%|█▊ | 8000/43598 [00:00<00:02, 12119.26 examples/s]
Normalizing raw HH preferences (train): 29%|██▉ | 12701/43598 [00:01<00:02, 12621.76 examples/s]
Normalizing raw HH preferences (train): 28%|██▊ | 12222/43598 [00:01<00:02, 12097.81 examples/s]
Normalizing raw HH preferences (train): 21%|██▏ | 9265/43598 [00:00<00:02, 12274.66 examples/s]
Normalizing raw HH preferences (train): 24%|██▍ | 10542/43598 [00:00<00:02, 12421.84 examples/s]
Normalizing raw HH preferences (train): 33%|███▎ | 14519/43598 [00:01<00:03, 9634.00 examples/s]
Normalizing raw HH preferences (train): 27%|██▋ | 11822/43598 [00:00<00:02, 12532.82 examples/s]
Normalizing raw HH preferences (train): 32%|███▏ | 13934/43598 [00:01<00:03, 8583.04 examples/s]
Normalizing raw HH preferences (train): 36%|███▋ | 15823/43598 [00:01<00:02, 10294.34 examples/s]
Normalizing raw HH preferences (train): 35%|███▍ | 15075/43598 [00:01<00:03, 9160.69 examples/s]
Normalizing raw HH preferences (train): 39%|███▉ | 17010/43598 [00:01<00:02, 10634.89 examples/s]
Normalizing raw HH preferences (train): 30%|███ | 13267/43598 [00:01<00:03, 8611.80 examples/s]
Normalizing raw HH preferences (train): 37%|███▋ | 16334/43598 [00:01<00:02, 9933.32 examples/s]
Normalizing raw HH preferences (train): 42%|████▏ | 18278/43598 [00:01<00:02, 11117.56 examples/s]
Normalizing raw HH preferences (train): 33%|███▎ | 14487/43598 [00:01<00:03, 9396.33 examples/s]
Normalizing raw HH preferences (trai
|
|||
|
|
Normalizing raw HH preferences (train): 97%|█████████▋| 42474/43598 [00:03<00:00, 12090.58 examples/s]
Normalizing raw HH preferences (train): 94%|█████████▎| 40781/43598 [00:03<00:00, 12225.71 examples/s]
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 11174.97 examples/s]
|
|||
|
|
Normalizing raw HH preferences (train): 98%|█████████▊| 42686/43598 [00:03<00:00, 12200.01 examples/s]
Normalizing raw HH preferences (train): 100%|██████████| 43598/43598 [00:03<00:00, 11146.85 examples/s]
|
|||
|
|
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
Normalizing raw HH preferences (test): 51%|█████ | 1183/2339 [00:00<00:00, 11784.33 examples/s]
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10810.37 examples/s]
|
|||
|
|
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
Normalizing raw HH preferences (test): 47%|████▋ | 1110/2339 [00:00<00:00, 11046.73 examples/s]
Normalizing raw HH preferences (test): 0%| | 0/2339 [00:00<?, ? examples/s]
Normalizing raw HH preferences (test): 99%|█████████▉| 2324/2339 [00:00<00:00, 11685.19 examples/s]
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10367.27 examples/s]
|
|||
|
|
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
|||
|
|
warnings.warn(
|
|||
|
|
Normalizing raw HH preferences (test): 48%|████▊ | 1112/2339 [00:00<00:00, 11078.09 examples/s][WARNING|logging.py:328] 2026-04-10 21:57:21,282 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
|||
|
|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 812.13it/s]
|
|||
|
|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 11186.36 examples/s]
Normalizing raw HH preferences (test): 100%|██████████| 2339/2339 [00:00<00:00, 10442.49 examples/s]
|
|||
|
|
Loading checkpoint shards: 14%|█▍ | 1/7 [00:00<00:00, 6.85it/s]
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 46.31it/s]
|
|||
|
|
[WARNING|trainer.py:821] 2026-04-10 21:57:21,524 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
|||
|
|
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
|||
|
|
warnings.warn(
|
|||
|
|
[WARNING|logging.py:328] 2026-04-10 21:57:21,905 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
|||
|
|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 797.42it/s]
|
|||
|
|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:391: UserWarning: You passed a model_id to the trainer. This will automatically create an `AutoModelForCausalLM` or a `PeftModel` (if you passed a `peft_config`) for you.
|
|||
|
|
warnings.warn(
|
|||
|
|
[WARNING|logging.py:328] 2026-04-10 21:57:22,104 >> You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`.
|
|||
|
|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 947.99it/s]
|
|||
|
|
Loading checkpoint shards: 14%|█▍ | 1/7 [00:00<00:01, 5.82it/s]
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 39.23it/s]
|
|||
|
|
[WARNING|trainer.py:821] 2026-04-10 21:57:22,164 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
|||
|
|
Loading checkpoint shards: 0%| | 0/7 [00:00<?, ?it/s]
Loading checkpoint shards: 14%|█▍ | 1/7 [00:00<00:00, 6.03it/s]
Loading checkpoint shards: 100%|██████████| 7/7 [00:00<00:00, 40.48it/s]
|
|||
|
|
[WARNING|trainer.py:821] 2026-04-10 21:57:22,356 >> Trainer.tokenizer is now deprecated. You should use `Trainer.processing_class = processing_class` instead.
|
|||
|
|
Tokenizing train (num_proc=12): 0%| | 0/43598 [00:00<?, ? examples/s]
Tokenizing train (num_proc=12): 0%| | 128/43598 [00:32<3:03:17, 3.95 examples/s]
Tokenizing train (num_proc=12): 1%| | 256/43598 [00:32<1:15:35, 9.56 examples/s]
Tokenizing train (num_proc=12): 1%| | 384/43598 [00:32<41:13, 17.47 examples/s]
Tokenizing train (num_proc=12): 1%| | 512/43598 [00:32<25:06, 28.60 examples/s]
Tokenizing train (num_proc=12): 1%|▏ | 640/43598 [00:32<16:15, 44.05 examples/s]
Tokenizing train (num_proc=12): 2%|▏ | 768/43598 [00:32<10:53, 65.53 examples/s]
Tokenizing train (num_proc=12): 2%|▏ | 896/43598 [00:33<07:30, 94.84 examples/s]
Tokenizing train (num_proc=12): 2%|▏ | 1024/43598 [00:33<05:17, 134.22 examples/s]
Tokenizing train (num_proc=12): 3%|▎ | 1152/43598 [00:33<03:48, 185.99 examples/s]
Tokenizing train (num_proc=12): 3%|▎ | 1280/43598 [00:33<02:47, 252.00 examples/s]
Tokenizing train (num_proc=12): 3%|▎ | 1408/43598 [00:33<02:06, 334.08 examples/s]
Tokenizing train (num_proc=12): 4%|▍ | 1664/43598 [00:33<01:22, 509.76 examples/s]
Tokenizing train (num_proc=12): 4%|▍ | 1792/43598 [00:33<01:10, 596.47 examples/s]
Tokenizing train (num_proc=12): 4%|▍ | 1920/43598 [00:33<01:00, 683.32 examples/s]
Tokenizing train (num_proc=12): 5%|▍ | 2048/43598 [00:33<00:53, 775.76 examples/s]
Tokenizing train (num_proc=12): 5%|▍ | 2176/43598 [00:34<00:48, 859.13 examples/s]
Tokenizing train (num_proc=12): 5%|▌ | 2304/43598 [00:34<00:44, 928.66 examples/s]
Tokenizing train (num_proc=12): 6%|▌ | 2432/43598 [00:34<00:41, 984.07 examples/s]
Tokenizing train (num_proc=12): 6%|▌ | 2560/43598 [00:34<00:41, 998.26 examples/s]
Tokenizing train (num_proc=12): 6%|▌ | 2688/43598 [00:34<00:39, 1044.12 examples/s]
Tokenizing train (num_proc=12): 6%|▋ | 2816/43598 [00:34<00:39, 1045.43 examples/s]
Tokenizing train (num_proc=12): 7%|▋ | 2944/43598 [00:34<00:37, 1089.48 examples/s]
Tokenizing train (num_proc=12): 7%|▋ | 3072/43598 [00:34<00:37, 1073.58 examples/s]
Tokenizing train (num_proc=12): 7%|▋ | 3200/43598 [00:34<00:37, 1077.40 examples/s]
Tokenizing train (num_proc=12): 8%|▊ | 3328/43598 [00:35<00:38, 1059.70 examples/s]
Tokenizing train (num_proc=12): 8%|▊ | 3456/43598 [00:35<00:37, 1082.46 examples/s]
Tokenizing train (num_proc=12): 8%|▊ | 3584/43598 [00:35<00:36, 1104.46 examples/s]
Tokenizing train (num_proc=12): 8%|▊ | 3634/43598 [00:46<00:36, 1104.46 examples/s]
Tokenizing train (num_proc=12): 9%|▊ | 3762/43598 [00:56<29:29, 22.51 examples/s]
Tokenizing train (num_proc=12): 9%|▉ | 3890/43598 [00:56<21:26, 30.87 examples/s]
Tokenizing train (num_proc=12): 9%|▉ | 4018/43598 [00:56<15:27, 42.65 examples/s]
Tokenizing train (num_proc=12): 10%|▉ | 4146/43598 [00:56<11:08, 59.04 examples/s]
Tokenizing train (num_proc=12): 10%|▉ | 4274/43598 [00:56<08:01, 81.64 examples/s]
Tokenizing train (num_proc=12): 10%|█ | 4402/43598 [00:56<05:49, 112.25 examples/s]
Tokenizing train (num_proc=12): 10%|█ | 4530/43598 [00:56<04:15, 153.03 examples/s]
Tokenizing train (num_proc=12): 11%|█ | 4658/43598 [00:57<03:09, 205.99 examples/s]
Tokenizing train (num_proc=12): 11%|█ | 4786/43598 [00:57<02:23, 270.82 examples/s]
Tokenizing train (num_proc=12): 11%|█▏ | 4914/43598 [00:57<01:51, 345.43 examples/s]
Tokenizing train (num_proc=12): 12%|█▏ | 5042/43598 [00:57<01:29, 433.07 examples/s]
Tokenizing train (num_proc=12): 12%|█▏ | 5170/43598 [00:57<01:12, 533.62 examples/s]
Tokenizing train (num_proc=12): 12%|█▏ | 5298/43598 [00:57<01:00, 635.92 examples/s]
Tokenizing train (num_proc=12): 13%|█▎ | 5554/43598 [00:57<00:46, 811.32 examples/s]
Tokenizing train (num_p
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
|||
|
|
self.run()
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
|||
|
|
self._target(*self._args, **self._kwargs)
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
|||
|
|
server.serve_forever()
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
|||
|
|
sys.exit(0)
|
|||
|
|
SystemExit: 0
|
|||
|
|
|
|||
|
|
During handling of the above exception, another exception occurred:
|
|||
|
|
|
|||
|
|
Traceback (most recent call last):
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
|||
|
|
finalizer()
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
|||
|
|
res = self._callback(*self._args, **self._kwargs)
|
|||
|
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
|||
|
|
rmtree(tempdir)
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
|||
|
|
_rmtree_safe_fd(fd, path, onerror)
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
|||
|
|
onerror(os.unlink, fullname, sys.exc_info())
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
|||
|
|
os.unlink(entry.name, dir_fd=topfd)
|
|||
|
|
OSError: [Errno 16] Device or resource busy: '.nfs382c99819e51fe0700001de9'
|
|||
|
|
Tokenizing train (num_proc=12): 100%|██████████| 43598/43598 [04:51<00:00, 149.56 examples/s]
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:02:55,305 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
Saving the dataset (0/2 shards): 0%| | 0/43598 [00:00<?, ? examples/s]
Saving the dataset (0/2 shards): 14%|█▍ | 6000/43598 [00:00<00:00, 43038.44 examples/s]
Saving the dataset (0/2 shards): 32%|███▏ | 14000/43598 [00:00<00:00, 57780.07 examples/s]
Saving the dataset (0/2 shards): 50%|█████ | 21799/43598 [00:00<00:00, 63681.88 examples/s]
Saving the dataset (1/2 shards): 50%|█████ | 21799/43598 [00:00<00:00, 63681.88 examples/s]
Saving the dataset (1/2 shards): 68%|██████▊ | 29799/43598 [00:00<00:00, 29736.56 examples/s]
Saving the dataset (1/2 shards): 84%|████████▍ | 36799/43598 [00:00<00:00, 36114.93 examples/s]
Saving the dataset (1/2 shards): 100%|██████████| 43598/43598 [00:01<00:00, 42157.99 examples/s]
Saving the dataset (2/2 shards): 100%|██████████| 43598/43598 [00:01<00:00, 42157.99 examples/s]
Saving the dataset (2/2 shards): 100%|██████████| 43598/43598 [00:01<00:00, 32158.30 examples/s]
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:02:57,911 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
Tokenizing test (num_proc=12): 0%| | 0/2339 [00:00<?, ? examples/s]
Tokenizing test (num_proc=12): 5%|▌ | 128/2339 [00:39<11:14, 3.28 examples/s]
Tokenizing test (num_proc=12): 14%|█▍ | 323/2339 [01:11<07:01, 4.78 examples/s]
Tokenizing test (num_proc=12): 22%|██▏ | 518/2339 [01:42<05:38, 5.38 examples/s]
Tokenizing test (num_proc=12): 30%|███ | 713/2339 [02:14<04:47, 5.65 examples/s]
Tokenizing test (num_proc=12): 39%|███▉ | 908/2339 [02:46<04:06, 5.80 examples/s]
Tokenizing test (num_proc=12): 47%|████▋ | 1103/2339 [03:19<03:29, 5.89 examples/s]
Tokenizing test (num_proc=12): 55%|█████▌ | 1298/2339 [03:50<02:54, 5.97 examples/s]
Tokenizing test (num_proc=12): 64%|██████▍ | 1493/2339 [04:22<02:20, 6.01 examples/s]
Tokenizing test (num_proc=12): 72%|███████▏ | 1688/2339 [04:55<01:48, 6.02 examples/s]
Tokenizing test (num_proc=12): 81%|████████ | 1883/2339 [05:27<01:15, 6.01 examples/s]
Tokenizing test (num_proc=12): 89%|████████▉ | 2078/2339 [06:00<00:43, 5.99 examples/s]
Tokenizing test (num_proc=12): 97%|█████████▋| 2273/2339 [06:33<00:11, 5.99 examples/s]Traceback (most recent call last):
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 314, in _bootstrap
|
|||
|
|
self.run()
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/process.py", line 108, in run
|
|||
|
|
self._target(*self._args, **self._kwargs)
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 600, in _run_server
|
|||
|
|
server.serve_forever()
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/managers.py", line 184, in serve_forever
|
|||
|
|
sys.exit(0)
|
|||
|
|
SystemExit: 0
|
|||
|
|
|
|||
|
|
During handling of the above exception, another exception occurred:
|
|||
|
|
|
|||
|
|
Traceback (most recent call last):
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 300, in _run_finalizers
|
|||
|
|
finalizer()
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 224, in __call__
|
|||
|
|
res = self._callback(*self._args, **self._kwargs)
|
|||
|
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/multiprocess/util.py", line 133, in _remove_temp_dir
|
|||
|
|
rmtree(tempdir)
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 752, in rmtree
|
|||
|
|
_rmtree_safe_fd(fd, path, onerror)
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 703, in _rmtree_safe_fd
|
|||
|
|
onerror(os.unlink, fullname, sys.exc_info())
|
|||
|
|
File "/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/shutil.py", line 701, in _rmtree_safe_fd
|
|||
|
|
os.unlink(entry.name, dir_fd=topfd)
|
|||
|
|
OSError: [Errno 16] Device or resource busy: '.nfs89d4f1995f55a4d300001dea'
|
|||
|
|
Tokenizing test (num_proc=12): 100%|██████████| 2339/2339 [06:33<00:00, 5.95 examples/s]
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:07,910 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
Saving the dataset (0/1 shards): 0%| | 0/2339 [00:00<?, ? examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 2339/2339 [00:00<00:00, 27649.24 examples/s]
Saving the dataset (1/1 shards): 100%|██████████| 2339/2339 [00:00<00:00, 27605.13 examples/s]
|
|||
|
|
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `BetaDPOTrainer.__init__`. Use `processing_class` instead.
|
|||
|
|
super().__init__(
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,767 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,768 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,769 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,770 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,770 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,771 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,773 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,994 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,995 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,995 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,995 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,995 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,995 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,995 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,996 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,996 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,996 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,996 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,996 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,996 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:10,996 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:11,017 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:11,017 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:11,018 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `BetaDPOTrainer.__init__`. Use `processing_class` instead.
|
|||
|
|
super().__init__(
|
|||
|
|
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `BetaDPOTrainer.__init__`. Use `processing_class` instead.
|
|||
|
|
super().__init__(
|
|||
|
|
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `BetaDPOTrainer.__init__`. Use `processing_class` instead.
|
|||
|
|
super().__init__(
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:11,018 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:11,018 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:11,018 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
[WARNING|trainer.py:816] 2026-04-10 22:10:11,018 >> Trainer.tokenizer is now deprecated. You should use Trainer.processing_class instead.
|
|||
|
|
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `BetaDPOTrainer.__init__`. Use `processing_class` instead.
|
|||
|
|
super().__init__(
|
|||
|
|
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `BetaDPOTrainer.__init__`. Use `processing_class` instead.
|
|||
|
|
super().__init__(
|
|||
|
|
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `BetaDPOTrainer.__init__`. Use `processing_class` instead.
|
|||
|
|
super().__init__(
|
|||
|
|
/home/feng.yulu/dynamic-dpo-v4/scripts/tokenized_dpo_trainer.py:518: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `BetaDPOTrainer.__init__`. Use `processing_class` instead.
|
|||
|
|
super().__init__(
|
|||
|
|
[INFO|trainer.py:748] 2026-04-10 22:10:11,062 >> Using auto half precision backend
|
|||
|
|
/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaForCausalLM because mixed precision turned on in FSDP. Affects: model.embed_tokens.weight, model.norm.weight, lm_head.weight.
|
|||
|
|
warnings.warn(
|
|||
|
|
/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/accelerate/accelerator.py:1557: UserWarning: Upcasted low precision parameters in LlamaDecoderLayer because mixed precision turned on in FSDP. Affects: self_attn.q_proj.weight, self_attn.k_proj.weight, self_attn.v_proj.weight, self_attn.o_proj.weight, mlp.gate_proj.weight, mlp.up_proj.weight, mlp.down_proj.weight, input_layernorm.weight, post_attention_layernorm.weight.
|
|||
|
|
warnings.warn(
|
|||
|
|
/home/feng.yulu/.conda/envs/dpo_venv/lib/python3.11/site-packages/accelerate/accelerator.py:1563: UserWarning: FSDP upcast of low precision parameters may affect the precision of model checkpoints.
|
|||
|
|
warnings.warn(
|
|||
|
|
[INFO|trainer.py:2414] 2026-04-10 22:10:15,720 >> ***** Running training *****
|
|||
|
|
[INFO|trainer.py:2415] 2026-04-10 22:10:15,720 >> Num examples = 43,598
|
|||
|
|
[INFO|trainer.py:2416] 2026-04-10 22:10:15,720 >> Num Epochs = 1
|
|||
|
|
[INFO|trainer.py:2417] 2026-04-10 22:10:15,720 >> Instantaneous batch size per device = 16
|
|||
|
|
[INFO|trainer.py:2420] 2026-04-10 22:10:15,720 >> Total train batch size (w. parallel, distributed & accumulation) = 128
|
|||
|
|
[INFO|trainer.py:2421] 2026-04-10 22:10:15,720 >> Gradient Accumulation steps = 1
|
|||
|
|
[INFO|trainer.py:2422] 2026-04-10 22:10:15,720 >> Total optimization steps = 340
|
|||
|
|
[INFO|trainer.py:2423] 2026-04-10 22:10:15,721 >> Number of trainable parameters = 1,003,782,656
|
|||
|
|
[INFO|integration_utils.py:831] 2026-04-10 22:10:15,722 >> Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
|
|||
|
|
wandb: Currently logged in as: can-not-fand (can-not-fand-northeastern-university). Use `wandb login --relogin` to force relogin
|
|||
|
|
wandb: wandb version 0.25.1 is available! To upgrade, please run:
|
|||
|
|
wandb: $ pip install wandb --upgrade
|
|||
|
|
wandb: Tracking run with wandb version 0.17.5
|
|||
|
|
wandb: Run data is saved locally in /scratch/feng.yulu/dynamic-dpo-v4/wandb/wandb/run-20260410_221018-54i2is22
|
|||
|
|
wandb: Run `wandb offline` to turn off syncing.
|
|||
|
|
wandb: Syncing run llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627
|
|||
|
|
wandb: ⭐️ View project at https://wandb.ai/can-not-fand-northeastern-university/huggingface
|
|||
|
|
wandb: 🚀 View run at https://wandb.ai/can-not-fand-northeastern-university/huggingface/runs/54i2is22
|
|||
|
|
0%| | 0/340 [00:00<?, ?it/s][WARNING|modeling_utils.py:1713] 2026-04-10 22:10:25,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
|||
|
|
[WARNING|modeling_utils.py:1713] 2026-04-10 22:10:25,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
|||
|
|
[WARNING|modeling_utils.py:1713] 2026-04-10 22:10:25,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
|||
|
|
[WARNING|modeling_utils.py:1713] 2026-04-10 22:10:25,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
|||
|
|
[WARNING|modeling_utils.py:1713] 2026-04-10 22:10:25,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
|||
|
|
[WARNING|modeling_utils.py:1713] 2026-04-10 22:10:25,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
|||
|
|
[WARNING|modeling_utils.py:1713] 2026-04-10 22:10:25,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
|||
|
|
[WARNING|modeling_utils.py:1713] 2026-04-10 22:10:25,250 >> Could not estimate the number of tokens of the input, floating-point operations will not be computed
|
|||
|
|
0%| | 1/340 [00:03<18:23, 3.26s/it]
{'loss': 0.6919, 'grad_norm': 23.302410125732422, 'learning_rate': 0.0, 'beta_dpo/gap_mean': -0.0009442940354347229, 'beta_dpo/gap_std': 0.03691839799284935, 'beta_dpo/beta_used_raw': 0.10121209919452667, 'beta_dpo/beta_used': 0.10121209919452667, 'beta_dpo/mask_keep_frac': 0.9375, 'logits/chosen': -0.4739703834056854, 'logits/rejected': -0.44689586758613586, 'epoch': 0.0}
|
|||
|
|
0%| | 1/340 [00:03<18:23, 3.26s/it]
1%| | 2/340 [00:06<17:05, 3.04s/it]
1%| | 3/340 [00:08<16:08, 2.87s/it]
1%| | 4/340 [00:11<14:59, 2.68s/it]
1%|▏ | 5/340 [00:13<14:50, 2.66s/it]
{'loss': 0.693, 'grad_norm': 24.834075927734375, 'learning_rate': 5.88235294117647e-08, 'beta_dpo/gap_mean': -0.0016960372449830174, 'beta_dpo/gap_std': 0.1151522547006607, 'beta_dpo/beta_used_raw': 0.10032124072313309, 'beta_dpo/beta_used': 0.10032124072313309, 'beta_dpo/mask_keep_frac': 0.765625, 'logits/chosen': -0.49943581223487854, 'logits/rejected': -0.4934660494327545, 'epoch': 0.01}
|
|||
|
|
1%|▏ | 5/340 [00:13<14:50, 2.66s/it]
2%|▏ | 6/340 [00:16<14:46, 2.65s/it]
2%|▏ | 7/340 [00:19<14:40, 2.65s/it]
2%|▏ | 8/340 [00:21<14:25, 2.61s/it]
3%|▎ | 9/340 [00:24<14:24, 2.61s/it]
3%|▎ | 10/340 [00:26<14:21, 2.61s/it]
{'loss': 0.692, 'grad_norm': 21.942047119140625, 'learning_rate': 1.3235294117647057e-07, 'beta_dpo/gap_mean': 0.0030363830737769604, 'beta_dpo/gap_std': 0.2163175642490387, 'beta_dpo/beta_used_raw': 0.101251520216465, 'beta_dpo/beta_used': 0.101251520216465, 'beta_dpo/mask_keep_frac': 0.824999988079071, 'logits/chosen': -0.5174359083175659, 'logits/rejected': -0.5005401968955994, 'epoch': 0.03}
|
|||
|
|
3%|▎ | 10/340 [00:26<14:21, 2.61s/it]
3%|▎ | 11/340 [00:29<14:23, 2.63s/it]
4%|▎ | 12/340 [00:32<14:20, 2.62s/it]
4%|▍ | 13/340 [00:34<14:21, 2.63s/it]
4%|▍ | 14/340 [00:37<14:12, 2.61s/it]
4%|▍ | 15/340 [00:39<14:06, 2.61s/it]
{'loss': 0.6911, 'grad_norm': 28.207460403442383, 'learning_rate': 2.0588235294117645e-07, 'beta_dpo/gap_mean': 0.024518460035324097, 'beta_dpo/gap_std': 0.2784799039363861, 'beta_dpo/beta_used_raw': 0.10108586400747299, 'beta_dpo/beta_used': 0.10108586400747299, 'beta_dpo/mask_keep_frac': 0.824999988079071, 'logits/chosen': -0.5348216891288757, 'logits/rejected': -0.5156930088996887, 'epoch': 0.04}
|
|||
|
|
4%|▍ | 15/340 [00:39<14:06, 2.61s/it]
5%|▍ | 16/340 [00:42<14:09, 2.62s/it]
5%|▌ | 17/340 [00:45<13:58, 2.60s/it]
5%|▌ | 18/340 [00:47<13:51, 2.58s/it]
6%|▌ | 19/340 [00:50<13:43, 2.56s/it]
6%|▌ | 20/340 [00:52<13:41, 2.57s/it]
{'loss': 0.6874, 'grad_norm': 19.902040481567383, 'learning_rate': 2.7941176470588235e-07, 'beta_dpo/gap_mean': 0.0749056339263916, 'beta_dpo/gap_std': 0.33879655599594116, 'beta_dpo/beta_used_raw': 0.10244777053594589, 'beta_dpo/beta_used': 0.10244777053594589, 'beta_dpo/mask_keep_frac': 0.8125, 'logits/chosen': -0.5660465955734253, 'logits/rejected': -0.5419166088104248, 'epoch': 0.06}
|
|||
|
|
6%|▌ | 20/340 [00:52<13:41, 2.57s/it]
6%|▌ | 21/340 [00:55<13:54, 2.62s/it]
6%|▋ | 22/340 [00:58<13:53, 2.62s/it]
7%|▋ | 23/340 [01:00<13:50, 2.62s/it]
7%|▋ | 24/340 [01:03<14:03, 2.67s/it]
7%|▋ | 25/340 [01:06<13:51, 2.64s/it]
{'loss': 0.6769, 'grad_norm': 22.522640228271484, 'learning_rate': 3.529411764705882e-07, 'beta_dpo/gap_mean': 0.20916345715522766, 'beta_dpo/gap_std': 0.456662118434906, 'beta_dpo/beta_used_raw': 0.10622622072696686, 'beta_dpo/beta_used': 0.10622622072696686, 'beta_dpo/mask_keep_frac': 0.887499988079071, 'logits/chosen': -0.5110368132591248, 'logits/rejected': -0.5050845146179199, 'epoch': 0.07}
|
|||
|
|
7%|▋ | 25/340 [01:06<13:51, 2.64s/it]
8%|▊ | 26/340 [01:08<13:40, 2.61s/it]
8%|▊ | 27/340 [01:11<13:21, 2.56s/it]
8%|▊ | 28/340 [01:13<13:22, 2.57s/it]
9%|▊ | 29/340 [01:16<13:24, 2.59s/it]
9%|▉ | 30/340 [01:18<13:25, 2.60s/it]
{'loss': 0.6574, 'grad_norm': 19.37394142150879, 'learning_rate': 4.264705882352941e-07, 'beta_dpo/gap_mean': 0.5209842920303345, 'beta_dpo/gap_std': 0.7702666521072388, 'beta_dpo/beta_used_raw': 0.10997174680233002, 'beta_dpo/beta_used': 0.10997174680233002, 'beta_dpo/mask_keep_frac': 0.762499988079071, 'logits/chosen': -0.5535926222801208, 'logits/rejected': -0.5316442251205444, 'epoch': 0.09}
|
|||
|
|
9%|▉ | 30/340 [01:18<13:25, 2.60s/it]
9%|▉ | 31/340 [01:21<13:23, 2.60s/it]
9%|▉ | 32/340 [01:24<13:26, 2.62s/it]
10%|▉ | 33/340 [01:26<13:22, 2.61s/it]
10%|█ | 34/340 [01:29<13:05, 2.57s/it]
10%|█ | 35/340 [01:31<13:08, 2.58s/it]
{'loss': 0.6265, 'grad_norm': 23.465280532836914, 'learning_rate': 5e-07, 'beta_dpo/gap_mean': 0.9489548802375793, 'beta_dpo/gap_std': 1.3326656818389893, 'beta_dpo/beta_used_raw': 0.11611036211252213, 'beta_dpo/beta_used': 0.11611036211252213, 'beta_dpo/mask_keep_frac': 0.6625000238418579, 'logits/chosen': -0.5605362057685852, 'logits/rejected': -0.5497816801071167, 'epoch': 0.1}
|
|||
|
|
10%|█ | 35/340 [01:31<13:08, 2.58s/it]
11%|█ | 36/340 [01:34<13:07, 2.59s/it]
11%|█ | 37/340 [01:37<13:02, 2.58s/it]
11%|█ | 38/340 [01:39<12:58, 2.58s/it]
11%|█▏ | 39/340 [01:42<12:53, 2.57s/it]
12%|█▏ | 40/340 [01:44<13:00, 2.60s/it]
{'loss': 0.5663, 'grad_norm': 20.867738723754883, 'learning_rate': 4.996706849759452e-07, 'beta_dpo/gap_mean': 1.789758324623108, 'beta_dpo/gap_std': 2.447655200958252, 'beta_dpo/beta_used_raw': 0.1326821744441986, 'beta_dpo/beta_used': 0.1326821744441986, 'beta_dpo/mask_keep_frac': 0.7749999761581421, 'logits/chosen': -0.6393685340881348, 'logits/rejected': -0.6073721051216125, 'epoch': 0.12}
|
|||
|
|
12%|█▏ | 40/340 [01:44<13:00, 2.60s/it]
12%|█▏ | 41/340 [01:47<13:02, 2.62s/it]
12%|█▏ | 42/340 [01:50<12:57, 2.61s/it]
13%|█▎ | 43/340 [01:52<12:52, 2.60s/it]
13%|█▎ | 44/340 [01:55<12:41, 2.57s/it]
13%|█▎ | 45/340 [01:57<12:32, 2.55s/it]
{'loss': 0.5411, 'grad_norm': 19.109943389892578, 'learning_rate': 4.986836074908615e-07, 'beta_dpo/gap_mean': 2.8082375526428223, 'beta_dpo/gap_std': 4.084892272949219, 'beta_dpo/beta_used_raw': 0.12091531604528427, 'beta_dpo/beta_used': 0.12091531604528427, 'beta_dpo/mask_keep_frac': 0.7875000238418579, 'logits/chosen': -0.6684064865112305, 'logits/rejected': -0.6361075639724731, 'epoch': 0.13}
|
|||
|
|
13%|█▎ | 45/340 [01:57<12:32, 2.55s/it]
14%|█▎ | 46/340 [02:00<12:39, 2.58s/it]
14%|█▍ | 47/340 [02:03<12:56, 2.65s/it]
14%|█▍ | 48/340 [02:05<12:49, 2.63s/it]
14%|█▍ | 49/340 [02:08<12:39, 2.61s/it]
15%|█▍ | 50/340 [02:10<12:25, 2.57s/it]
{'loss': 0.536, 'grad_norm': 22.54947853088379, 'learning_rate': 4.970413680203148e-07, 'beta_dpo/gap_mean': 3.5657267570495605, 'beta_dpo/gap_std': 5.662721633911133, 'beta_dpo/beta_used_raw': 0.11514081805944443, 'beta_dpo/beta_used': 0.11514081805944443, 'beta_dpo/mask_keep_frac': 0.75, 'logits/chosen': -0.6647295951843262, 'logits/rejected': -0.6276803612709045, 'epoch': 0.15}
|
|||
|
|
15%|█▍ | 50/340 [02:10<12:25, 2.57s/it]
15%|█▌ | 51/340 [02:13<12:23, 2.57s/it]
15%|█▌ | 52/340 [02:15<12:07, 2.53s/it]
16%|█▌ | 53/340 [02:18<12:07, 2.53s/it]
16%|█▌ | 54/340 [02:20<12:12, 2.56s/it]
16%|█▌ | 55/340 [02:23<12:11, 2.57s/it]
{'loss': 0.493, 'grad_norm': 38.691123962402344, 'learning_rate': 4.947482930773511e-07, 'beta_dpo/gap_mean': 4.35926628112793, 'beta_dpo/gap_std': 7.092940330505371, 'beta_dpo/beta_used_raw': 0.13137957453727722, 'beta_dpo/beta_used': 0.13137957453727722, 'beta_dpo/mask_keep_frac': 0.7250000238418579, 'logits/chosen': -0.7010586261749268, 'logits/rejected': -0.675391435623169, 'epoch': 0.16}
|
|||
|
|
16%|█▌ | 55/340 [02:23<12:11, 2.57s/it]
16%|█▋ | 56/340 [02:26<12:13, 2.58s/it]
17%|█▋ | 57/340 [02:28<12:16, 2.60s/it]
17%|█▋ | 58/340 [02:31<12:14, 2.60s/it]
17%|█▋ | 59/340 [02:34<12:11, 2.60s/it]
18%|█▊ | 60/340 [02:36<12:12, 2.62s/it]
{'loss': 0.5315, 'grad_norm': 31.127901077270508, 'learning_rate': 4.918104238142103e-07, 'beta_dpo/gap_mean': 5.047989845275879, 'beta_dpo/gap_std': 8.23731803894043, 'beta_dpo/beta_used_raw': 0.094205841422081, 'beta_dpo/beta_used': 0.094205841422081, 'beta_dpo/mask_keep_frac': 0.7250000238418579, 'logits/chosen': -0.724422812461853, 'logits/rejected': -0.6809322237968445, 'epoch': 0.18}
|
|||
|
|
18%|█▊ | 60/340 [02:36<12:12, 2.62s/it]
18%|█▊ | 61/340 [02:39<11:56, 2.57s/it]
18%|█▊ | 62/340 [02:41<11:59, 2.59s/it]
19%|█▊ | 63/340 [02:44<12:00, 2.60s/it]
19%|█▉ | 64/340 [02:46<11:57, 2.60s/it]
19%|█▉ | 65/340 [02:49<11:50, 2.58s/it]
{'loss': 0.4741, 'grad_norm': 20.432043075561523, 'learning_rate': 4.882355001067891e-07, 'beta_dpo/gap_mean': 5.827352523803711, 'beta_dpo/gap_std': 8.861337661743164, 'beta_dpo/beta_used_raw': 0.11677428334951401, 'beta_dpo/beta_used': 0.11677428334951401, 'beta_dpo/mask_keep_frac': 0.8500000238418579, 'logits/chosen': -0.6648474931716919, 'logits/rejected': -0.637535572052002, 'epoch': 0.19}
|
|||
|
|
19%|█▉ | 65/340 [02:49<11:50, 2.58s/it]
19%|█▉ | 66/340 [02:51<11:37, 2.54s/it]
20%|█▉ | 67/340 [02:54<11:34, 2.54s/it]
20%|██ | 68/340 [02:57<11:35, 2.56s/it]
20%|██ | 69/340 [02:59<11:23, 2.52s/it]
21%|██ | 70/340 [03:02<11:32, 2.57s/it]
{'loss': 0.5026, 'grad_norm': 24.550621032714844, 'learning_rate': 4.840329401637809e-07, 'beta_dpo/gap_mean': 6.462141990661621, 'beta_dpo/gap_std': 9.157753944396973, 'beta_dpo/beta_used_raw': 0.09036926180124283, 'beta_dpo/beta_used': 0.09036926180124283, 'beta_dpo/mask_keep_frac': 0.7875000238418579, 'logits/chosen': -0.6986874341964722, 'logits/rejected': -0.6637295484542847, 'epoch': 0.21}
|
|||
|
|
21%|██ | 70/340 [03:02<11:32, 2.57s/it]
21%|██ | 71/340 [03:04<11:32, 2.58s/it]
21%|██ | 72/340 [03:07<11:45, 2.63s/it]
21%|██▏ | 73/340 [03:10<11:38, 2.61s/it]
22%|██▏ | 74/340 [03:12<11:31, 2.60s/it]
22%|██▏ | 75/340 [03:15<11:27, 2.59s/it]
{'loss': 0.5172, 'grad_norm': 21.727449417114258, 'learning_rate': 4.792138157142157e-07, 'beta_dpo/gap_mean': 6.905457496643066, 'beta_dpo/gap_std': 9.706171035766602, 'beta_dpo/beta_used_raw': 0.0741354450583458, 'beta_dpo/beta_used': 0.07552285492420197, 'beta_dpo/mask_keep_frac': 0.8125, 'logits/chosen': -0.7469242215156555, 'logits/rejected': -0.7223338484764099, 'epoch': 0.22}
|
|||
|
|
22%|██▏ | 75/340 [03:15<11:27, 2.59s/it]
22%|██▏ | 76/340 [03:17<11:26, 2.60s/it]
23%|██▎ | 77/340 [03:20<11:24, 2.60s/it]
23%|██▎ | 78/340 [03:23<11:18, 2.59s/it]
23%|██▎ | 79/340 [03:25<11:14, 2.59s/it]
24%|██▎ | 80/340 [03:28<11:08, 2.57s/it]
{'loss': 0.4756, 'grad_norm': 0.3042762279510498, 'learning_rate': 4.737908228387656e-07, 'beta_dpo/gap_mean': 7.501389980316162, 'beta_dpo/gap_std': 10.180580139160156, 'beta_dpo/beta_used_raw': 0.09059171378612518, 'beta_dpo/beta_used': 0.09862758219242096, 'beta_dpo/mask_keep_frac': 0.8374999761581421, 'logits/chosen': -0.7543509602546692, 'logits/rejected': -0.7017374038696289, 'epoch': 0.24}
|
|||
|
|
24%|██▎ | 80/340 [03:28<11:08, 2.57s/it]
24%|██▍ | 81/340 [03:30<11:09, 2.59s/it]
24%|██▍ | 82/340 [03:33<10:57, 2.55s/it]
24%|██▍ | 83/340 [03:35<10:47, 2.52s/it]
25%|██▍ | 84/340 [03:38<10:51, 2.54s/it]
25%|██▌ | 85/340 [03:40<10:56, 2.58s/it]
{'loss': 0.5345, 'grad_norm': 0.3057352602481842, 'learning_rate': 4.6777824852166437e-07, 'beta_dpo/gap_mean': 7.9440507888793945, 'beta_dpo/gap_std': 10.780364990234375, 'beta_dpo/beta_used_raw': 0.05231575295329094, 'beta_dpo/beta_used': 0.06848205626010895, 'beta_dpo/mask_keep_frac': 0.800000011920929, 'logits/chosen': -0.7203555107116699, 'logits/rejected': -0.6912198066711426, 'epoch': 0.25}
|
|||
|
|
25%|██▌ | 85/340 [03:41<10:56, 2.58s/it]
25%|██▌ | 86/340 [03:43<10:53, 2.57s/it]
26%|██▌ | 87/340 [03:46<10:47, 2.56s/it]
26%|██▌ | 88/340 [03:48<10:43, 2.55s/it]
26%|██▌ | 89/340 [03:51<10:44, 2.57s/it]
26%|██▋ | 90/340 [03:53<10:42, 2.57s/it]
{'loss': 0.4944, 'grad_norm': 21.29926300048828, 'learning_rate': 4.611919330113591e-07, 'beta_dpo/gap_mean': 8.4508638381958, 'beta_dpo/gap_std': 11.448507308959961, 'beta_dpo/beta_used_raw': 0.08253253251314163, 'beta_dpo/beta_used': 0.08253253251314163, 'beta_dpo/mask_keep_frac': 0.875, 'logits/chosen': -0.6781951189041138, 'logits/rejected': -0.6568866968154907, 'epoch': 0.26}
|
|||
|
|
26%|██▋ | 90/340 [03:53<10:42, 2.57s/it]
27%|██▋ | 91/340 [03:56<10:42, 2.58s/it]
27%|██▋ | 92/340 [03:59<10:51, 2.63s/it]
27%|██▋ | 93/340 [04:01<10:45, 2.61s/it]
28%|██▊ | 94/340 [04:04<10:36, 2.59s/it]
28%|██▊ | 95/340 [04:06<10:33, 2.59s/it]
{'loss': 0.5335, 'grad_norm': 15.502776145935059, 'learning_rate': 4.5404922808905543e-07, 'beta_dpo/gap_mean': 8.875980377197266, 'beta_dpo/gap_std': 12.020231246948242, 'beta_dpo/beta_used_raw': 0.05684714391827583, 'beta_dpo/beta_used': 0.05684714391827583, 'beta_dpo/mask_keep_frac': 0.7875000238418579, 'logits/chosen': -0.7086650729179382, 'logits/rejected': -0.6651682257652283, 'epoch': 0.28}
|
|||
|
|
28%|██▊ | 95/340 [04:06<10:33, 2.59s/it]
28%|██▊ | 96/340 [04:09<10:42, 2.63s/it]
29%|██▊ | 97/340 [04:12<10:26, 2.58s/it]
29%|██▉ | 98/340 [04:14<10:27, 2.59s/it]
29%|██▉ | 99/340 [04:17<10:23, 2.59s/it]
29%|██▉ | 100/340 [04:19<10:22, 2.60s/it]
{'loss': 0.4098, 'grad_norm': 34.129478454589844, 'learning_rate': 4.4636895135509966e-07, 'beta_dpo/gap_mean': 9.62360954284668, 'beta_dpo/gap_std': 12.684171676635742, 'beta_dpo/beta_used_raw': 0.1179933100938797, 'beta_dpo/beta_used': 0.1179933100938797, 'beta_dpo/mask_keep_frac': 0.762499988079071, 'logits/chosen': -0.6843082904815674, 'logits/rejected': -0.660758912563324, 'epoch': 0.29}
|
|||
|
|
29%|██▉ | 100/340 [04:19<10:22, 2.60s/it][INFO|trainer.py:4307] 2026-04-10 22:14:41,910 >>
|
|||
|
|
***** Running Evaluation *****
|
|||
|
|
[INFO|trainer.py:4309] 2026-04-10 22:14:41,910 >> Num examples = 2339
|
|||
|
|
[INFO|trainer.py:4312] 2026-04-10 22:14:41,910 >> Batch size = 16
|
|||
|
|
|
|||
|
|
0%| | 0/18 [00:00<?, ?it/s][A
|
|||
|
|
11%|█ | 2/18 [00:01<00:09, 1.77it/s][A
|
|||
|
|
17%|█▋ | 3/18 [00:02<00:11, 1.28it/s][A
|
|||
|
|
22%|██▏ | 4/18 [00:03<00:12, 1.10it/s][A
|
|||
|
|
28%|██▊ | 5/18 [00:04<00:12, 1.01it/s][A
|
|||
|
|
33%|███▎ | 6/18 [00:05<00:12, 1.04s/it][A
|
|||
|
|
39%|███▉ | 7/18 [00:06<00:11, 1.08s/it][A
|
|||
|
|
44%|████▍ | 8/18 [00:07<00:11, 1.10s/it][A
|
|||
|
|
50%|█████ | 9/18 [00:09<00:09, 1.11s/it][A
|
|||
|
|
56%|█████▌ | 10/18 [00:10<00:08, 1.12s/it][A
|
|||
|
|
61%|██████ | 11/18 [00:11<00:07, 1.11s/it][A
|
|||
|
|
67%|██████▋ | 12/18 [00:12<00:06, 1.13s/it][A
|
|||
|
|
72%|███████▏ | 13/18 [00:13<00:05, 1.13s/it][A
|
|||
|
|
78%|███████▊ | 14/18 [00:14<00:04, 1.14s/it][A
|
|||
|
|
83%|████████▎ | 15/18 [00:15<00:03, 1.14s/it][A
|
|||
|
|
89%|████████▉ | 16/18 [00:17<00:02, 1.14s/it][A
|
|||
|
|
94%|█████████▍| 17/18 [00:18<00:01, 1.14s/it][A
|
|||
|
|
100%|██████████| 18/18 [00:19<00:00, 1.12s/it][A
|
|||
|
|
[A{'eval_loss': 0.6251118183135986, 'eval_runtime': 20.4115, 'eval_samples_per_second': 114.592, 'eval_steps_per_second': 0.931, 'eval_beta_dpo/gap_mean': 7.997772216796875, 'eval_beta_dpo/gap_std': 13.260690689086914, 'eval_beta_dpo/beta_used_raw': 0.01594320312142372, 'eval_beta_dpo/beta_used': 0.04330332204699516, 'eval_beta_dpo/mask_keep_frac': 1.0, 'eval_logits/chosen': -0.6977978944778442, 'eval_logits/rejected': -0.6668843626976013, 'epoch': 0.29}
|
|||
|
|
29%|██▉ | 100/340 [04:40<10:22, 2.60s/it]
|
|||
|
|
100%|██████████| 18/18 [00:19<00:00, 1.12s/it][A
|
|||
|
|
[A
30%|██▉ | 101/340 [04:42<34:44, 8.72s/it]
30%|███ | 102/340 [04:45<27:17, 6.88s/it]
30%|███ | 103/340 [04:47<21:56, 5.56s/it]
31%|███ | 104/340 [04:50<18:20, 4.66s/it]
31%|███ | 105/340 [04:53<15:45, 4.02s/it]
{'loss': 0.4061, 'grad_norm': 15.665854454040527, 'learning_rate': 4.381713366536311e-07, 'beta_dpo/gap_mean': 7.8849334716796875, 'beta_dpo/gap_std': 13.30543041229248, 'beta_dpo/beta_used_raw': 0.15054886043071747, 'beta_dpo/beta_used': 0.15054886043071747, 'beta_dpo/mask_keep_frac': 0.762499988079071, 'logits/chosen': -0.7553393244743347, 'logits/rejected': -0.710943341255188, 'epoch': 0.31}
|
|||
|
|
31%|███ | 105/340 [04:53<15:45, 4.02s/it]
31%|███ | 106/340 [04:55<13:56, 3.57s/it]
31%|███▏ | 107/340 [04:58<12:42, 3.27s/it]
32%|███▏ | 108/340 [05:00<11:49, 3.06s/it]
32%|███▏ | 109/340 [05:03<11:16, 2.93s/it]
32%|███▏ | 110/340 [05:05<10:48, 2.82s/it]
{'loss': 0.4595, 'grad_norm': 7.982070446014404, 'learning_rate': 4.2947798076611047e-07, 'beta_dpo/gap_mean': 9.127924919128418, 'beta_dpo/gap_std': 13.331835746765137, 'beta_dpo/beta_used_raw': 0.09847154468297958, 'beta_dpo/beta_used': 0.09847154468297958, 'beta_dpo/mask_keep_frac': 0.75, 'logits/chosen': -0.7284727692604065, 'logits/rejected': -0.696673572063446, 'epoch': 0.32}
|
|||
|
|
32%|███▏ | 110/340 [05:05<10:48, 2.82s/it]
33%|███▎ | 111/340 [05:08<10:26, 2.73s/it]
33%|███▎ | 112/340 [05:10<09:42, 2.56s/it]
33%|███▎ | 113/340 [05:12<09:33, 2.52s/it]
34%|███▎ | 114/340 [05:15<09:32, 2.53s/it]
34%|███▍ | 115/340 [05:18<09:31, 2.54s/it]
{'loss': 0.3778, 'grad_norm': 36.213523864746094, 'learning_rate': 4.203117865141635e-07, 'beta_dpo/gap_mean': 9.942410469055176, 'beta_dpo/gap_std': 13.166864395141602, 'beta_dpo/beta_used_raw': 0.12598751485347748, 'beta_dpo/beta_used': 0.12598751485347748, 'beta_dpo/mask_keep_frac': 0.762499988079071, 'logits/chosen': -0.7145182490348816, 'logits/rejected': -0.6985291242599487, 'epoch': 0.34}
|
|||
|
|
34%|███▍ | 115/340 [05:18<09:31, 2.54s/it]
34%|███▍ | 116/340 [05:20<09:41, 2.60s/it]
34%|███▍ | 117/340 [05:23<09:39, 2.60s/it]
35%|███▍ | 118/340 [05:26<09:35, 2.59s/it]
35%|███▌ | 119/340 [05:28<09:43, 2.64s/it]
35%|███▌ | 120/340 [05:31<09:37, 2.62s/it]
{'loss': 0.5271, 'grad_norm': 14.08332347869873, 'learning_rate': 4.106969024216348e-07, 'beta_dpo/gap_mean': 10.542096138000488, 'beta_dpo/gap_std': 13.39216136932373, 'beta_dpo/beta_used_raw': 0.048566654324531555, 'beta_dpo/beta_used': 0.05509430170059204, 'beta_dpo/mask_keep_frac': 0.8125, 'logits/chosen': -0.7127692103385925, 'logits/rejected': -0.6740670204162598, 'epoch': 0.35}
|
|||
|
|
35%|███▌ | 120/340 [05:31<09:37, 2.62s/it]
36%|███▌ | 121/340 [05:33<09:32, 2.61s/it]
36%|███▌ | 122/340 [05:36<09:28, 2.61s/it]
36%|███▌ | 123/340 [05:39<09:29, 2.63s/it]
36%|███▋ | 124/340 [05:41<09:17, 2.58s/it]
37%|███▋ | 125/340 [05:43<08:57, 2.50s/it]
{'loss': 0.513, 'grad_norm': 1.2771987915039062, 'learning_rate': 4.006586590948141e-07, 'beta_dpo/gap_mean': 11.009790420532227, 'beta_dpo/gap_std': 13.461648941040039, 'beta_dpo/beta_used_raw': 0.05064947530627251, 'beta_dpo/beta_used': 0.05550508573651314, 'beta_dpo/mask_keep_frac': 0.824999988079071, 'logits/chosen': -0.7180671691894531, 'logits/rejected': -0.6869726777076721, 'epoch': 0.37}
|
|||
|
|
37%|███▋ | 125/340 [05:44<08:57, 2.50s/it]
37%|███▋ | 126/340 [05:46<08:52, 2.49s/it]
37%|███▋ | 127/340 [05:49<08:55, 2.51s/it]
38%|███▊ | 128/340 [05:51<08:56, 2.53s/it]
38%|███▊ | 129/340 [05:54<08:56, 2.54s/it]
38%|███▊ | 130/340 [05:56<08:56, 2.56s/it]
{'loss': 0.5068, 'grad_norm': 5.609388828277588, 'learning_rate': 3.9022350248844246e-07, 'beta_dpo/gap_mean': 11.50378704071045, 'beta_dpo/gap_std': 14.039319038391113, 'beta_dpo/beta_used_raw': 0.05528440326452255, 'beta_dpo/beta_used': 0.05528440326452255, 'beta_dpo/mask_keep_frac': 0.7749999761581421, 'logits/chosen': -0.7099085450172424, 'logits/rejected': -0.6715607643127441, 'epoch': 0.38}
|
|||
|
|
38%|███▊ | 130/340 [05:56<08:56, 2.56s/it]
39%|███▊ | 131/340 [05:59<08:39, 2.49s/it]
39%|███▉ | 132/340 [06:01<08:46, 2.53s/it]
39%|███▉ | 133/340 [06:04<08:39, 2.51s/it]
39%|███▉ | 134/340 [06:06<08:37, 2.51s/it]
40%|███▉ | 135/340 [06:09<08:39, 2.54s/it]
{'loss': 0.4231, 'grad_norm': 22.845937728881836, 'learning_rate': 3.794189242333106e-07, 'beta_dpo/gap_mean': 12.224153518676758, 'beta_dpo/gap_std': 15.014795303344727, 'beta_dpo/beta_used_raw': 0.08324670791625977, 'beta_dpo/beta_used': 0.08324670791625977, 'beta_dpo/mask_keep_frac': 0.7749999761581421, 'logits/chosen': -0.7034512758255005, 'logits/rejected': -0.6600346565246582, 'epoch': 0.4}
|
|||
|
|
40%|███▉ | 135/340 [06:09<08:39, 2.54s/it]
40%|████ | 136/340 [06:11<08:38, 2.54s/it]
40%|████ | 137/340 [06:14<08:25, 2.49s/it]
41%|████ | 138/340 [06:16<08:26, 2.51s/it]
41%|████ | 139/340 [06:19<08:21, 2.50s/it]
41%|████ | 140/340 [06:21<08:30, 2.55s/it]
{'loss': 0.3902, 'grad_norm': 31.461519241333008, 'learning_rate': 3.6827338920900253e-07, 'beta_dpo/gap_mean': 13.073277473449707, 'beta_dpo/gap_std': 15.834657669067383, 'beta_dpo/beta_used_raw': 0.10875506699085236, 'beta_dpo/beta_used': 0.10875506699085236, 'beta_dpo/mask_keep_frac': 0.75, 'logits/chosen': -0.6739553213119507, 'logits/rejected': -0.6366498470306396, 'epoch': 0.41}
|
|||
|
|
41%|████ | 140/340 [06:21<08:30, 2.55s/it]
41%|████▏ | 141/340 [06:24<08:30, 2.57s/it]
42%|████▏ | 142/340 [06:27<08:32, 2.59s/it]
42%|████▏ | 143/340 [06:29<08:29, 2.59s/it]
42%|████▏ | 144/340 [06:32<08:16, 2.54s/it]
43%|████▎ | 145/340 [06:34<08:20, 2.57s/it]
{'loss': 0.3819, 'grad_norm': 29.94761085510254, 'learning_rate': 3.568162605525952e-07, 'beta_dpo/gap_mean': 13.881492614746094, 'beta_dpo/gap_std': 16.42840003967285, 'beta_dpo/beta_used_raw': 0.10356837511062622, 'beta_dpo/beta_used': 0.10356837511062622, 'beta_dpo/mask_keep_frac': 0.7250000238418579, 'logits/chosen': -0.7220578193664551, 'logits/rejected': -0.6809359788894653, 'epoch': 0.43}
|
|||
|
|
43%|████▎ | 145/340 [06:34<08:20, 2.57s/it]
43%|████▎ | 146/340 [06:37<08:20, 2.58s/it]
43%|████▎ | 147/340 [06:40<08:21, 2.60s/it]
44%|████▎ | 148/340 [06:42<08:15, 2.58s/it]
44%|████▍ | 149/340 [06:45<08:11, 2.57s/it]
44%|████▍ | 150/340 [06:47<08:10, 2.58s/it]
{'loss': 0.607, 'grad_norm': 5.527612209320068, 'learning_rate': 3.4507772230088147e-07, 'beta_dpo/gap_mean': 14.227258682250977, 'beta_dpo/gap_std': 17.448183059692383, 'beta_dpo/beta_used_raw': -0.023198971524834633, 'beta_dpo/beta_used': 0.019714761525392532, 'beta_dpo/mask_keep_frac': 0.862500011920929, 'logits/chosen': -0.6326473355293274, 'logits/rejected': -0.6032054424285889, 'epoch': 0.44}
|
|||
|
|
44%|████▍ | 150/340 [06:47<08:10, 2.58s/it]
44%|████▍ | 151/340 [06:50<08:05, 2.57s/it]
45%|████▍ | 152/340 [06:52<08:03, 2.57s/it]
45%|████▌ | 153/340 [06:55<07:59, 2.57s/it]
45%|████▌ | 154/340 [06:58<08:01, 2.59s/it]
46%|████▌ | 155/340 [07:00<07:56, 2.57s/it]
{'loss': 0.4478, 'grad_norm': 23.10860824584961, 'learning_rate': 3.3308869986991487e-07, 'beta_dpo/gap_mean': 14.670598983764648, 'beta_dpo/gap_std': 18.554828643798828, 'beta_dpo/beta_used_raw': 0.06505511701107025, 'beta_dpo/beta_used': 0.07979521155357361, 'beta_dpo/mask_keep_frac': 0.699999988079071, 'logits/chosen': -0.7037164568901062, 'logits/rejected': -0.6613154411315918, 'epoch': 0.46}
|
|||
|
|
46%|████▌ | 155/340 [07:00<07:56, 2.57s/it]
46%|████▌ | 156/340 [07:02<07:43, 2.52s/it]
46%|████▌ | 157/340 [07:05<07:41, 2.52s/it]
46%|████▋ | 158/340 [07:07<07:37, 2.51s/it]
47%|████▋ | 159/340 [07:10<07:34, 2.51s/it]
47%|████▋ | 160/340 [07:13<07:36, 2.54s/it]
{'loss': 0.4758, 'grad_norm': 31.426233291625977, 'learning_rate': 3.208807785813777e-07, 'beta_dpo/gap_mean': 15.389450073242188, 'beta_dpo/gap_std': 19.081418991088867, 'beta_dpo/beta_used_raw': 0.040607184171676636, 'beta_dpo/beta_used': 0.06584476679563522, 'beta_dpo/mask_keep_frac': 0.8374999761581421, 'logits/chosen': -0.6574662923812866, 'logits/rejected': -0.630233883857727, 'epoch': 0.47}
|
|||
|
|
47%|████▋ | 160/340 [07:13<07:36, 2.54s/it]
47%|████▋ | 161/340 [07:15<07:35, 2.54s/it]
48%|████▊ | 162/340 [07:18<07:37, 2.57s/it]
48%|████▊ | 163/340 [07:20<07:37, 2.59s/it]
48%|████▊ | 164/340 [07:23<07:36, 2.59s/it]
49%|████▊ | 165/340 [07:26<07:32, 2.59s/it]
{'loss': 0.4768, 'grad_norm': 73.11759948730469, 'learning_rate': 3.084861204504122e-07, 'beta_dpo/gap_mean': 16.22821617126465, 'beta_dpo/gap_std': 19.637792587280273, 'beta_dpo/beta_used_raw': 0.07671914994716644, 'beta_dpo/beta_used': 0.08810704201459885, 'beta_dpo/mask_keep_frac': 0.800000011920929, 'logits/chosen': -0.6618590354919434, 'logits/rejected': -0.6243924498558044, 'epoch': 0.49}
|
|||
|
|
49%|████▊ | 165/340 [07:26<07:32, 2.59s/it]
49%|████▉ | 166/340 [07:28<07:19, 2.53s/it]
49%|████▉ | 167/340 [07:30<07:09, 2.48s/it]
49%|████▉ | 168/340 [07:33<07:10, 2.50s/it]
50%|████▉ | 169/340 [07:35<07:03, 2.47s/it]
50%|█████ | 170/340 [07:38<06:57, 2.46s/it]
{'loss': 0.5686, 'grad_norm': 0.5254238247871399, 'learning_rate': 2.959373794541426e-07, 'beta_dpo/gap_mean': 17.07744598388672, 'beta_dpo/gap_std': 20.277606964111328, 'beta_dpo/beta_used_raw': 0.02408101223409176, 'beta_dpo/beta_used': 0.02722100354731083, 'beta_dpo/mask_keep_frac': 0.7875000238418579, 'logits/chosen': -0.6966148614883423, 'logits/rejected': -0.666491687297821, 'epoch': 0.5}
|
|||
|
|
50%|█████ | 170/340 [07:38<06:57, 2.46s/it]
50%|█████ | 171/340 [07:41<07:15, 2.57s/it]
51%|█████ | 172/340 [07:43<07:11, 2.57s/it]
51%|█████ | 173/340 [07:46<07:09, 2.57s/it]
51%|█████ | 174/340 [07:48<06:49, 2.47s/it]
51%|█████▏ | 175/340 [07:50<06:50, 2.49s/it]
{'loss': 0.4753, 'grad_norm': 0.5429490804672241, 'learning_rate': 2.8326761550411346e-07, 'beta_dpo/gap_mean': 17.654155731201172, 'beta_dpo/gap_std': 21.08226776123047, 'beta_dpo/beta_used_raw': 0.03722615912556648, 'beta_dpo/beta_used': 0.0664793998003006, 'beta_dpo/mask_keep_frac': 0.8125, 'logits/chosen': -0.6848665475845337, 'logits/rejected': -0.6613831520080566, 'epoch': 0.51}
|
|||
|
|
51%|█████▏ | 175/340 [07:50<06:50, 2.49s/it]
52%|█████▏ | 176/340 [07:53<06:54, 2.53s/it]
52%|█████▏ | 177/340 [07:56<06:54, 2.55s/it]
52%|█████▏ | 178/340 [07:58<06:55, 2.56s/it]
53%|█████▎ | 179/340 [08:01<06:53, 2.57s/it]
53%|█████▎ | 180/340 [08:03<06:48, 2.55s/it]
{'loss': 0.5579, 'grad_norm': 15.737401962280273, 'learning_rate': 2.7051020734928443e-07, 'beta_dpo/gap_mean': 17.7331485748291, 'beta_dpo/gap_std': 22.08762550354004, 'beta_dpo/beta_used_raw': -0.008070843294262886, 'beta_dpo/beta_used': 0.024588093161582947, 'beta_dpo/mask_keep_frac': 0.824999988079071, 'logits/chosen': -0.6688377261161804, 'logits/rejected': -0.6460214853286743, 'epoch': 0.53}
|
|||
|
|
53%|█████▎ | 180/340 [08:03<06:48, 2.55s/it]
53%|█████▎ | 181/340 [08:06<06:50, 2.58s/it]
54%|█████▎ | 182/340 [08:09<06:46, 2.57s/it]
54%|█████▍ | 183/340 [08:11<06:48, 2.60s/it]
54%|█████▍ | 184/340 [08:14<06:52, 2.65s/it]
54%|█████▍ | 185/340 [08:17<06:48, 2.63s/it]
{'loss': 0.4855, 'grad_norm': 0.506800651550293, 'learning_rate': 2.5769876463904263e-07, 'beta_dpo/gap_mean': 18.408456802368164, 'beta_dpo/gap_std': 22.61962890625, 'beta_dpo/beta_used_raw': 0.05483890324831009, 'beta_dpo/beta_used': 0.08162590861320496, 'beta_dpo/mask_keep_frac': 0.862500011920929, 'logits/chosen': -0.7215656042098999, 'logits/rejected': -0.6699239611625671, 'epoch': 0.54}
|
|||
|
|
54%|█████▍ | 185/340 [08:17<06:48, 2.63s/it]
55%|█████▍ | 186/340 [08:19<06:43, 2.62s/it]
55%|█████▌ | 187/340 [08:22<06:40, 2.62s/it]
55%|█████▌ | 188/340 [08:24<06:34, 2.60s/it]
56%|█████▌ | 189/340 [08:27<06:24, 2.54s/it]
56%|█████▌ | 190/340 [08:29<06:24, 2.57s/it]
{'loss': 0.4949, 'grad_norm': 26.899166107177734, 'learning_rate': 2.4486703937790243e-07, 'beta_dpo/gap_mean': 18.814666748046875, 'beta_dpo/gap_std': 22.990680694580078, 'beta_dpo/beta_used_raw': 0.027242619544267654, 'beta_dpo/beta_used': 0.05661209672689438, 'beta_dpo/mask_keep_frac': 0.824999988079071, 'logits/chosen': -0.7019311785697937, 'logits/rejected': -0.6498968005180359, 'epoch': 0.56}
|
|||
|
|
56%|█████▌ | 190/340 [08:29<06:24, 2.57s/it]
56%|█████▌ | 191/340 [08:32<06:23, 2.57s/it]
56%|█████▋ | 192/340 [08:34<06:14, 2.53s/it]
57%|█████▋ | 193/340 [08:37<06:14, 2.55s/it]
57%|█████▋ | 194/340 [08:40<06:15, 2.57s/it]
57%|█████▋ | 195/340 [08:42<06:13, 2.58s/it]
{'loss': 0.5286, 'grad_norm': 11.008431434631348, 'learning_rate': 2.320488370051681e-07, 'beta_dpo/gap_mean': 19.533567428588867, 'beta_dpo/gap_std': 23.629451751708984, 'beta_dpo/beta_used_raw': 0.001962479902431369, 'beta_dpo/beta_used': 0.02816765382885933, 'beta_dpo/mask_keep_frac': 0.824999988079071, 'logits/chosen': -0.7254117727279663, 'logits/rejected': -0.6765154004096985, 'epoch': 0.57}
|
|||
|
|
57%|█████▋ | 195/340 [08:42<06:13, 2.58s/it]
58%|█████▊ | 196/340 [08:45<06:07, 2.55s/it]
58%|█████▊ | 197/340 [08:47<06:05, 2.56s/it]
58%|█████▊ | 198/340 [08:50<05:56, 2.51s/it]
59%|█████▊ | 199/340 [08:52<05:56, 2.52s/it]
59%|█████▉ | 200/340 [08:55<05:48, 2.49s/it]
{'loss': 0.5527, 'grad_norm': 78.41595458984375, 'learning_rate': 2.192779273338215e-07, 'beta_dpo/gap_mean': 20.06936264038086, 'beta_dpo/gap_std': 24.53436851501465, 'beta_dpo/beta_used_raw': -0.0015767127042636275, 'beta_dpo/beta_used': 0.0643467828631401, 'beta_dpo/mask_keep_frac': 0.862500011920929, 'logits/chosen': -0.6875912547111511, 'logits/rejected': -0.6458339095115662, 'epoch': 0.59}
|
|||
|
|
59%|█████▉ | 200/340 [08:55<05:48, 2.49s/it][INFO|trainer.py:4307] 2026-04-10 22:19:17,214 >>
|
|||
|
|
***** Running Evaluation *****
|
|||
|
|
[INFO|trainer.py:4309] 2026-04-10 22:19:17,214 >> Num examples = 2339
|
|||
|
|
[INFO|trainer.py:4312] 2026-04-10 22:19:17,214 >> Batch size = 16
|
|||
|
|
|
|||
|
|
0%| | 0/18 [00:00<?, ?it/s][A
|
|||
|
|
11%|█ | 2/18 [00:01<00:08, 1.78it/s][A
|
|||
|
|
17%|█▋ | 3/18 [00:02<00:11, 1.29it/s][A
|
|||
|
|
22%|██▏ | 4/18 [00:03<00:12, 1.10it/s][A
|
|||
|
|
28%|██▊ | 5/18 [00:04<00:12, 1.01it/s][A
|
|||
|
|
33%|███▎ | 6/18 [00:05<00:12, 1.04s/it][A
|
|||
|
|
39%|███▉ | 7/18 [00:06<00:11, 1.07s/it][A
|
|||
|
|
44%|████▍ | 8/18 [00:07<00:10, 1.09s/it][A
|
|||
|
|
50%|█████ | 9/18 [00:09<00:09, 1.10s/it][A
|
|||
|
|
56%|█████▌ | 10/18 [00:10<00:08, 1.11s/it][A
|
|||
|
|
61%|██████ | 11/18 [00:11<00:07, 1.11s/it][A
|
|||
|
|
67%|██████▋ | 12/18 [00:12<00:06, 1.12s/it][A
|
|||
|
|
72%|███████▏ | 13/18 [00:13<00:05, 1.12s/it][A
|
|||
|
|
78%|███████▊ | 14/18 [00:14<00:04, 1.13s/it][A
|
|||
|
|
83%|████████▎ | 15/18 [00:15<00:03, 1.14s/it][A
|
|||
|
|
89%|████████▉ | 16/18 [00:16<00:02, 1.14s/it][A
|
|||
|
|
94%|█████████▍| 17/18 [00:18<00:01, 1.13s/it][A
|
|||
|
|
100%|██████████| 18/18 [00:19<00:00, 1.12s/it][A
|
|||
|
|
[A{'eval_loss': 0.6420564651489258, 'eval_runtime': 20.3208, 'eval_samples_per_second': 115.104, 'eval_steps_per_second': 0.935, 'eval_beta_dpo/gap_mean': 17.105911254882812, 'eval_beta_dpo/gap_std': 25.945871353149414, 'eval_beta_dpo/beta_used_raw': -0.0800742357969284, 'eval_beta_dpo/beta_used': 0.03733323514461517, 'eval_beta_dpo/mask_keep_frac': 1.0, 'eval_logits/chosen': -0.6982784271240234, 'eval_logits/rejected': -0.6586927771568298, 'epoch': 0.59}
|
|||
|
|
59%|█████▉ | 200/340 [09:15<05:48, 2.49s/it]
|
|||
|
|
100%|██████████| 18/18 [00:19<00:00, 1.12s/it][A
|
|||
|
|
[A[INFO|trainer.py:3984] 2026-04-10 22:19:52,330 >> Saving model checkpoint to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/checkpoint-200
|
|||
|
|
[INFO|configuration_utils.py:419] 2026-04-10 22:19:52,342 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/checkpoint-200/config.json
|
|||
|
|
[INFO|configuration_utils.py:911] 2026-04-10 22:19:52,349 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/checkpoint-200/generation_config.json
|
|||
|
|
[INFO|modeling_utils.py:3580] 2026-04-10 22:20:34,403 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 6 checkpoint shards. You can find where each parameters has been saved in the index located at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/checkpoint-200/model.safetensors.index.json.
|
|||
|
|
[INFO|tokenization_utils_base.py:2510] 2026-04-10 22:20:34,411 >> tokenizer config file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/checkpoint-200/tokenizer_config.json
|
|||
|
|
[INFO|tokenization_utils_base.py:2519] 2026-04-10 22:20:34,416 >> Special tokens file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/checkpoint-200/special_tokens_map.json
|
|||
|
|
59%|█████▉ | 201/340 [13:22<3:09:39, 81.86s/it]
59%|█████▉ | 202/340 [13:24<2:13:26, 58.02s/it]
60%|█████▉ | 203/340 [13:26<1:34:20, 41.32s/it]
60%|██████ | 204/340 [13:29<1:07:17, 29.69s/it]
60%|██████ | 205/340 [13:32<48:30, 21.56s/it]
{'loss': 0.4449, 'grad_norm': 76.40715789794922, 'learning_rate': 2.065879555832674e-07, 'beta_dpo/gap_mean': 16.670331954956055, 'beta_dpo/gap_std': 27.034832000732422, 'beta_dpo/beta_used_raw': 0.18078216910362244, 'beta_dpo/beta_used': 0.18078216910362244, 'beta_dpo/mask_keep_frac': 0.887499988079071, 'logits/chosen': -0.6358317136764526, 'logits/rejected': -0.5890509486198425, 'epoch': 0.6}
|
|||
|
|
60%|██████ | 205/340 [13:32<48:30, 21.56s/it]
61%|██████ | 206/340 [13:34<35:12, 15.76s/it]
61%|██████ | 207/340 [13:36<26:10, 11.81s/it]
61%|██████ | 208/340 [13:39<20:00, 9.10s/it]
61%|██████▏ | 209/340 [13:42<15:34, 7.13s/it]
62%|██████▏ | 210/340 [13:44<12:24, 5.73s/it]
{'loss': 0.4006, 'grad_norm': 39.985557556152344, 'learning_rate': 1.9401235374032425e-07, 'beta_dpo/gap_mean': 18.74222183227539, 'beta_dpo/gap_std': 27.3233642578125, 'beta_dpo/beta_used_raw': 0.187847301363945, 'beta_dpo/beta_used': 0.187847301363945, 'beta_dpo/mask_keep_frac': 0.7875000238418579, 'logits/chosen': -0.6837745308876038, 'logits/rejected': -0.6255474090576172, 'epoch': 0.62}
|
|||
|
|
62%|██████▏ | 210/340 [13:44<12:24, 5.73s/it]
62%|██████▏ | 211/340 [13:47<10:17, 4.79s/it]
62%|██████▏ | 212/340 [13:49<08:48, 4.13s/it]
63%|██████▎ | 213/340 [13:52<07:47, 3.68s/it]
63%|██████▎ | 214/340 [13:55<07:03, 3.36s/it]
63%|██████▎ | 215/340 [13:57<06:32, 3.14s/it]
{'loss': 0.5414, 'grad_norm': 56.95214080810547, 'learning_rate': 1.8158425248197928e-07, 'beta_dpo/gap_mean': 20.168214797973633, 'beta_dpo/gap_std': 27.281606674194336, 'beta_dpo/beta_used_raw': 0.053963758051395416, 'beta_dpo/beta_used': 0.0615837462246418, 'beta_dpo/mask_keep_frac': 0.862500011920929, 'logits/chosen': -0.5969057083129883, 'logits/rejected': -0.5545859336853027, 'epoch': 0.63}
|
|||
|
|
63%|██████▎ | 215/340 [13:57<06:32, 3.14s/it]
64%|██████▎ | 216/340 [14:00<06:10, 2.98s/it]
64%|██████▍ | 217/340 [14:02<05:49, 2.84s/it]
64%|██████▍ | 218/340 [14:05<05:43, 2.81s/it]
64%|██████▍ | 219/340 [14:08<05:32, 2.75s/it]
65%|██████▍ | 220/340 [14:10<05:23, 2.69s/it]
{'loss': 0.5464, 'grad_norm': 0.543950080871582, 'learning_rate': 1.6933639389195134e-07, 'beta_dpo/gap_mean': 20.327245712280273, 'beta_dpo/gap_std': 26.49213218688965, 'beta_dpo/beta_used_raw': 0.008795802481472492, 'beta_dpo/beta_used': 0.031995899975299835, 'beta_dpo/mask_keep_frac': 0.8374999761581421, 'logits/chosen': -0.6841639280319214, 'logits/rejected': -0.6511374711990356, 'epoch': 0.65}
|
|||
|
|
65%|██████▍ | 220/340 [14:10<05:23, 2.69s/it]
65%|██████▌ | 221/340 [14:13<05:19, 2.69s/it]
65%|██████▌ | 222/340 [14:16<05:16, 2.68s/it]
66%|██████▌ | 223/340 [14:18<05:09, 2.65s/it]
66%|██████▌ | 224/340 [14:21<04:59, 2.58s/it]
66%|██████▌ | 225/340 [14:23<04:53, 2.56s/it]
{'loss': 0.4873, 'grad_norm': 29.658409118652344, 'learning_rate': 1.573010452010098e-07, 'beta_dpo/gap_mean': 20.5634765625, 'beta_dpo/gap_std': 25.834671020507812, 'beta_dpo/beta_used_raw': 0.016606144607067108, 'beta_dpo/beta_used': 0.04347361996769905, 'beta_dpo/mask_keep_frac': 0.7875000238418579, 'logits/chosen': -0.6632441282272339, 'logits/rejected': -0.6577039957046509, 'epoch': 0.66}
|
|||
|
|
66%|██████▌ | 225/340 [14:23<04:53, 2.56s/it]
66%|██████▋ | 226/340 [14:26<04:52, 2.57s/it]
67%|██████▋ | 227/340 [14:28<04:48, 2.56s/it]
67%|██████▋ | 228/340 [14:31<04:47, 2.57s/it]
67%|██████▋ | 229/340 [14:33<04:46, 2.58s/it]
68%|██████▊ | 230/340 [14:36<04:45, 2.59s/it]
{'loss': 0.4797, 'grad_norm': 0.6241604685783386, 'learning_rate': 1.4550991377830423e-07, 'beta_dpo/gap_mean': 21.10856819152832, 'beta_dpo/gap_std': 25.58962059020996, 'beta_dpo/beta_used_raw': 0.05692853406071663, 'beta_dpo/beta_used': 0.06577815115451813, 'beta_dpo/mask_keep_frac': 0.737500011920929, 'logits/chosen': -0.7060235738754272, 'logits/rejected': -0.669354259967804, 'epoch': 0.68}
|
|||
|
|
68%|██████▊ | 230/340 [14:36<04:45, 2.59s/it]
68%|██████▊ | 231/340 [14:39<04:41, 2.59s/it]
68%|██████▊ | 232/340 [14:41<04:41, 2.60s/it]
69%|██████▊ | 233/340 [14:44<04:38, 2.60s/it]
69%|██████▉ | 234/340 [14:46<04:35, 2.59s/it]
69%|██████▉ | 235/340 [14:49<04:34, 2.61s/it]
{'loss': 0.4357, 'grad_norm': 14.550406455993652, 'learning_rate': 1.339940635976592e-07, 'beta_dpo/gap_mean': 21.435104370117188, 'beta_dpo/gap_std': 25.414148330688477, 'beta_dpo/beta_used_raw': 0.023799167945981026, 'beta_dpo/beta_used': 0.06475953757762909, 'beta_dpo/mask_keep_frac': 0.800000011920929, 'logits/chosen': -0.6889506578445435, 'logits/rejected': -0.6716668009757996, 'epoch': 0.69}
|
|||
|
|
69%|██████▉ | 235/340 [14:49<04:34, 2.61s/it]
69%|██████▉ | 236/340 [14:52<04:33, 2.63s/it]
70%|██████▉ | 237/340 [14:54<04:30, 2.63s/it]
70%|███████ | 238/340 [14:57<04:26, 2.61s/it]
70%|███████ | 239/340 [15:00<04:23, 2.61s/it]
71%|███████ | 240/340 [15:02<04:16, 2.57s/it]
{'loss': 0.5265, 'grad_norm': 11.02522087097168, 'learning_rate': 1.227838333989088e-07, 'beta_dpo/gap_mean': 21.869482040405273, 'beta_dpo/gap_std': 25.504459381103516, 'beta_dpo/beta_used_raw': 0.0025838587898761034, 'beta_dpo/beta_used': 0.022588472813367844, 'beta_dpo/mask_keep_frac': 0.737500011920929, 'logits/chosen': -0.6110752820968628, 'logits/rejected': -0.5741311311721802, 'epoch': 0.71}
|
|||
|
|
71%|███████ | 240/340 [15:02<04:16, 2.57s/it]
71%|███████ | 241/340 [15:05<04:14, 2.57s/it]
71%|███████ | 242/340 [15:07<04:06, 2.51s/it]
71%|███████▏ | 243/340 [15:10<04:06, 2.54s/it]
72%|███████▏ | 244/340 [15:12<04:03, 2.54s/it]
72%|███████▏ | 245/340 [15:15<04:03, 2.57s/it]
{'loss': 0.5448, 'grad_norm': 0.5983785390853882, 'learning_rate': 1.1190875675987355e-07, 'beta_dpo/gap_mean': 22.568851470947266, 'beta_dpo/gap_std': 25.90200424194336, 'beta_dpo/beta_used_raw': -0.007365362253040075, 'beta_dpo/beta_used': 0.03154964745044708, 'beta_dpo/mask_keep_frac': 0.7875000238418579, 'logits/chosen': -0.6300492286682129, 'logits/rejected': -0.6109535098075867, 'epoch': 0.72}
|
|||
|
|
72%|███████▏ | 245/340 [15:15<04:03, 2.57s/it]
72%|███████▏ | 246/340 [15:17<04:02, 2.58s/it]
73%|███████▎ | 247/340 [15:20<04:01, 2.60s/it]
73%|███████▎ | 248/340 [15:23<03:58, 2.59s/it]
73%|███████▎ | 249/340 [15:25<03:55, 2.59s/it]
74%|███████▎ | 250/340 [15:28<03:51, 2.57s/it]
{'loss': 0.629, 'grad_norm': 17.40310287475586, 'learning_rate': 1.0139748428955333e-07, 'beta_dpo/gap_mean': 22.215688705444336, 'beta_dpo/gap_std': 27.019912719726562, 'beta_dpo/beta_used_raw': -0.058729518204927444, 'beta_dpo/beta_used': 0.010828005149960518, 'beta_dpo/mask_keep_frac': 0.8374999761581421, 'logits/chosen': -0.6890392303466797, 'logits/rejected': -0.629682183265686, 'epoch': 0.74}
|
|||
|
|
74%|███████▎ | 250/340 [15:28<03:51, 2.57s/it]
74%|███████▍ | 251/340 [15:30<03:49, 2.58s/it]
74%|███████▍ | 252/340 [15:33<03:47, 2.59s/it]
74%|███████▍ | 253/340 [15:35<03:42, 2.56s/it]
75%|███████▍ | 254/340 [15:38<03:46, 2.63s/it]
75%|███████▌ | 255/340 [15:41<03:42, 2.62s/it]
{'loss': 0.483, 'grad_norm': 53.0207405090332, 'learning_rate': 9.127770814751932e-08, 'beta_dpo/gap_mean': 22.695995330810547, 'beta_dpo/gap_std': 27.621633529663086, 'beta_dpo/beta_used_raw': 0.08087030053138733, 'beta_dpo/beta_used': 0.09269052743911743, 'beta_dpo/mask_keep_frac': 0.8125, 'logits/chosen': -0.6670210361480713, 'logits/rejected': -0.6118627786636353, 'epoch': 0.75}
|
|||
|
|
75%|███████▌ | 255/340 [15:41<03:42, 2.62s/it]
75%|███████▌ | 256/340 [15:43<03:39, 2.61s/it]
76%|███████▌ | 257/340 [15:46<03:36, 2.61s/it]
76%|███████▌ | 258/340 [15:49<03:32, 2.60s/it]
76%|███████▌ | 259/340 [15:51<03:30, 2.59s/it]
76%|███████▋ | 260/340 [15:54<03:27, 2.60s/it]
{'loss': 0.4968, 'grad_norm': 0.6237814426422119, 'learning_rate': 8.15760890883607e-08, 'beta_dpo/gap_mean': 23.055011749267578, 'beta_dpo/gap_std': 28.25390625, 'beta_dpo/beta_used_raw': 0.03205912187695503, 'beta_dpo/beta_used': 0.06755250692367554, 'beta_dpo/mask_keep_frac': 0.762499988079071, 'logits/chosen': -0.6667768359184265, 'logits/rejected': -0.6239995956420898, 'epoch': 0.76}
|
|||
|
|
76%|███████▋ | 260/340 [15:54<03:27, 2.60s/it]
77%|███████▋ | 261/340 [15:56<03:23, 2.58s/it]
77%|███████▋ | 262/340 [15:59<03:22, 2.60s/it]
77%|███████▋ | 263/340 [16:02<03:19, 2.59s/it]
78%|███████▊ | 264/340 [16:04<03:17, 2.60s/it]
78%|███████▊ | 265/340 [16:07<03:14, 2.59s/it]
{'loss': 0.4401, 'grad_norm': 93.24835205078125, 'learning_rate': 7.231818622338822e-08, 'beta_dpo/gap_mean': 22.97963523864746, 'beta_dpo/gap_std': 28.165149688720703, 'beta_dpo/beta_used_raw': 0.1062905341386795, 'beta_dpo/beta_used': 0.11417696624994278, 'beta_dpo/mask_keep_frac': 0.762499988079071, 'logits/chosen': -0.6425198316574097, 'logits/rejected': -0.6141684651374817, 'epoch': 0.78}
|
|||
|
|
78%|███████▊ | 265/340 [16:07<03:14, 2.59s/it]
78%|███████▊ | 266/340 [16:09<03:13, 2.61s/it]
79%|███████▊ | 267/340 [16:12<03:12, 2.64s/it]
79%|███████▉ | 268/340 [16:15<03:08, 2.61s/it]
79%|███████▉ | 269/340 [16:17<03:03, 2.59s/it]
79%|███████▉ | 270/340 [16:20<03:02, 2.61s/it]
{'loss': 0.4832, 'grad_norm': 12.020166397094727, 'learning_rate': 6.352838968463919e-08, 'beta_dpo/gap_mean': 23.209665298461914, 'beta_dpo/gap_std': 28.643651962280273, 'beta_dpo/beta_used_raw': -0.0038310796953737736, 'beta_dpo/beta_used': 0.07874588668346405, 'beta_dpo/mask_keep_frac': 0.8125, 'logits/chosen': -0.6789681911468506, 'logits/rejected': -0.6184022426605225, 'epoch': 0.79}
|
|||
|
|
79%|███████▉ | 270/340 [16:20<03:02, 2.61s/it]
80%|███████▉ | 271/340 [16:22<02:59, 2.60s/it]
80%|████████ | 272/340 [16:25<02:55, 2.58s/it]
80%|████████ | 273/340 [16:28<02:53, 2.58s/it]
81%|████████ | 274/340 [16:30<02:50, 2.59s/it]
81%|████████ | 275/340 [16:33<02:48, 2.59s/it]
{'loss': 0.5253, 'grad_norm': 0.6346384882926941, 'learning_rate': 5.5229856368582376e-08, 'beta_dpo/gap_mean': 24.013660430908203, 'beta_dpo/gap_std': 29.33469009399414, 'beta_dpo/beta_used_raw': -0.023246586322784424, 'beta_dpo/beta_used': 0.05330665037035942, 'beta_dpo/mask_keep_frac': 0.7250000238418579, 'logits/chosen': -0.6784375905990601, 'logits/rejected': -0.6448493599891663, 'epoch': 0.81}
|
|||
|
|
81%|████████ | 275/340 [16:33<02:48, 2.59s/it]
81%|████████ | 276/340 [16:35<02:44, 2.57s/it]
81%|████████▏ | 277/340 [16:38<02:42, 2.58s/it]
82%|████████▏ | 278/340 [16:40<02:39, 2.58s/it]
82%|████████▏ | 279/340 [16:43<02:35, 2.55s/it]
82%|████████▏ | 280/340 [16:46<02:34, 2.57s/it]
{'loss': 0.53, 'grad_norm': 0.6082450151443481, 'learning_rate': 4.7444448928806615e-08, 'beta_dpo/gap_mean': 24.447540283203125, 'beta_dpo/gap_std': 29.648815155029297, 'beta_dpo/beta_used_raw': -0.010663707740604877, 'beta_dpo/beta_used': 0.05292302370071411, 'beta_dpo/mask_keep_frac': 0.762499988079071, 'logits/chosen': -0.6179937720298767, 'logits/rejected': -0.5764154195785522, 'epoch': 0.82}
|
|||
|
|
82%|████████▏ | 280/340 [16:46<02:34, 2.57s/it]
83%|████████▎ | 281/340 [16:48<02:34, 2.62s/it]
83%|████████▎ | 282/340 [16:51<02:31, 2.61s/it]
83%|████████▎ | 283/340 [16:53<02:28, 2.61s/it]
84%|████████▎ | 284/340 [16:56<02:25, 2.60s/it]
84%|████████▍ | 285/340 [16:59<02:21, 2.57s/it]
{'loss': 0.6357, 'grad_norm': 0.6881201863288879, 'learning_rate': 4.019267817841834e-08, 'beta_dpo/gap_mean': 24.31735610961914, 'beta_dpo/gap_std': 29.43593406677246, 'beta_dpo/beta_used_raw': -0.07739663124084473, 'beta_dpo/beta_used': 0.007934780791401863, 'beta_dpo/mask_keep_frac': 0.762499988079071, 'logits/chosen': -0.6771946549415588, 'logits/rejected': -0.6086295247077942, 'epoch': 0.84}
|
|||
|
|
84%|████████▍ | 285/340 [16:59<02:21, 2.57s/it]
84%|████████▍ | 286/340 [17:01<02:18, 2.57s/it]
84%|████████▍ | 287/340 [17:04<02:15, 2.56s/it]
85%|████████▍ | 288/340 [17:06<02:17, 2.64s/it]
85%|████████▌ | 289/340 [17:09<02:13, 2.62s/it]
85%|████████▌ | 290/340 [17:12<02:09, 2.60s/it]
{'loss': 0.5345, 'grad_norm': 2.793721914291382, 'learning_rate': 3.349364905389032e-08, 'beta_dpo/gap_mean': 24.635099411010742, 'beta_dpo/gap_std': 30.013864517211914, 'beta_dpo/beta_used_raw': 0.009315362200140953, 'beta_dpo/beta_used': 0.06074627488851547, 'beta_dpo/mask_keep_frac': 0.862500011920929, 'logits/chosen': -0.6379111409187317, 'logits/rejected': -0.5973175764083862, 'epoch': 0.85}
|
|||
|
|
85%|████████▌ | 290/340 [17:12<02:09, 2.60s/it]
86%|████████▌ | 291/340 [17:14<02:07, 2.59s/it]
86%|████████▌ | 292/340 [17:17<02:04, 2.59s/it]
86%|████████▌ | 293/340 [17:19<02:01, 2.59s/it]
86%|████████▋ | 294/340 [17:22<01:57, 2.55s/it]
87%|████████▋ | 295/340 [17:24<01:55, 2.56s/it]
{'loss': 0.5441, 'grad_norm': 50.0855598449707, 'learning_rate': 2.736501028272095e-08, 'beta_dpo/gap_mean': 24.830781936645508, 'beta_dpo/gap_std': 30.81571388244629, 'beta_dpo/beta_used_raw': -0.01273317076265812, 'beta_dpo/beta_used': 0.03756168484687805, 'beta_dpo/mask_keep_frac': 0.7875000238418579, 'logits/chosen': -0.617714524269104, 'logits/rejected': -0.6301193237304688, 'epoch': 0.87}
|
|||
|
|
87%|████████▋ | 295/340 [17:24<01:55, 2.56s/it]
87%|████████▋ | 296/340 [17:27<01:52, 2.56s/it]
87%|████████▋ | 297/340 [17:30<01:50, 2.56s/it]
88%|████████▊ | 298/340 [17:32<01:48, 2.58s/it]
88%|████████▊ | 299/340 [17:35<01:46, 2.61s/it]
88%|████████▊ | 300/340 [17:37<01:44, 2.61s/it]
{'loss': 0.5831, 'grad_norm': 0.6376844048500061, 'learning_rate': 2.1822907887504932e-08, 'beta_dpo/gap_mean': 24.904342651367188, 'beta_dpo/gap_std': 31.082351684570312, 'beta_dpo/beta_used_raw': 0.004354533273726702, 'beta_dpo/beta_used': 0.044209837913513184, 'beta_dpo/mask_keep_frac': 0.75, 'logits/chosen': -0.5964897274971008, 'logits/rejected': -0.6028931736946106, 'epoch': 0.88}
|
|||
|
|
88%|████████▊ | 300/340 [17:37<01:44, 2.61s/it][INFO|trainer.py:4307] 2026-04-10 22:27:59,972 >>
|
|||
|
|
***** Running Evaluation *****
|
|||
|
|
[INFO|trainer.py:4309] 2026-04-10 22:27:59,972 >> Num examples = 2339
|
|||
|
|
[INFO|trainer.py:4312] 2026-04-10 22:27:59,972 >> Batch size = 16
|
|||
|
|
|
|||
|
|
0%| | 0/18 [00:00<?, ?it/s][A
|
|||
|
|
11%|█ | 2/18 [00:01<00:08, 1.78it/s][A
|
|||
|
|
17%|█▋ | 3/18 [00:02<00:11, 1.28it/s][A
|
|||
|
|
22%|██▏ | 4/18 [00:03<00:12, 1.10it/s][A
|
|||
|
|
28%|██▊ | 5/18 [00:04<00:12, 1.01it/s][A
|
|||
|
|
33%|███▎ | 6/18 [00:05<00:12, 1.04s/it][A
|
|||
|
|
39%|███▉ | 7/18 [00:06<00:11, 1.07s/it][A
|
|||
|
|
44%|████▍ | 8/18 [00:07<00:10, 1.09s/it][A
|
|||
|
|
50%|█████ | 9/18 [00:09<00:09, 1.10s/it][A
|
|||
|
|
56%|█████▌ | 10/18 [00:10<00:08, 1.11s/it][A
|
|||
|
|
61%|██████ | 11/18 [00:11<00:07, 1.11s/it][A
|
|||
|
|
67%|██████▋ | 12/18 [00:12<00:06, 1.12s/it][A
|
|||
|
|
72%|███████▏ | 13/18 [00:13<00:05, 1.12s/it][A
|
|||
|
|
78%|███████▊ | 14/18 [00:14<00:04, 1.13s/it][A
|
|||
|
|
83%|████████▎ | 15/18 [00:15<00:03, 1.14s/it][A
|
|||
|
|
89%|████████▉ | 16/18 [00:16<00:02, 1.14s/it][A
|
|||
|
|
94%|█████████▍| 17/18 [00:18<00:01, 1.14s/it][A
|
|||
|
|
100%|██████████| 18/18 [00:19<00:00, 1.12s/it][A
|
|||
|
|
[A{'eval_loss': 0.6427361965179443, 'eval_runtime': 20.3459, 'eval_samples_per_second': 114.962, 'eval_steps_per_second': 0.934, 'eval_beta_dpo/gap_mean': 20.08871841430664, 'eval_beta_dpo/gap_std': 30.078739166259766, 'eval_beta_dpo/beta_used_raw': -0.11691396683454514, 'eval_beta_dpo/beta_used': 0.031669970601797104, 'eval_beta_dpo/mask_keep_frac': 1.0, 'eval_logits/chosen': -0.6625580191612244, 'eval_logits/rejected': -0.6206780672073364, 'epoch': 0.88}
|
|||
|
|
88%|████████▊ | 300/340 [17:58<01:44, 2.61s/it]
|
|||
|
|
100%|██████████| 18/18 [00:19<00:00, 1.12s/it][A
|
|||
|
|
[A
89%|████████▊ | 301/340 [18:00<05:40, 8.72s/it]
89%|████████▉ | 302/340 [18:03<04:21, 6.87s/it]
89%|████████▉ | 303/340 [18:06<03:26, 5.58s/it]
89%|████████▉ | 304/340 [18:08<02:47, 4.66s/it]
90%|████████▉ | 305/340 [18:10<02:17, 3.94s/it]
{'loss': 0.5221, 'grad_norm': 89.4169692993164, 'learning_rate': 1.6881942648911074e-08, 'beta_dpo/gap_mean': 19.461414337158203, 'beta_dpo/gap_std': 29.93111801147461, 'beta_dpo/beta_used_raw': 0.20903603732585907, 'beta_dpo/beta_used': 0.20903603732585907, 'beta_dpo/mask_keep_frac': 0.824999988079071, 'logits/chosen': -0.6793561577796936, 'logits/rejected': -0.6282657384872437, 'epoch': 0.9}
|
|||
|
|
90%|████████▉ | 305/340 [18:10<02:17, 3.94s/it]
90%|█████████ | 306/340 [18:13<01:59, 3.52s/it]
90%|█████████ | 307/340 [18:15<01:46, 3.23s/it]
91%|█████████ | 308/340 [18:18<01:37, 3.06s/it]
91%|█████████ | 309/340 [18:21<01:30, 2.93s/it]
91%|█████████ | 310/340 [18:23<01:24, 2.81s/it]
{'loss': 0.4927, 'grad_norm': 0.6243640780448914, 'learning_rate': 1.2555131639630567e-08, 'beta_dpo/gap_mean': 21.82315444946289, 'beta_dpo/gap_std': 30.26885414123535, 'beta_dpo/beta_used_raw': 0.13552138209342957, 'beta_dpo/beta_used': 0.1465708315372467, 'beta_dpo/mask_keep_frac': 0.800000011920929, 'logits/chosen': -0.5958537459373474, 'logits/rejected': -0.5621305704116821, 'epoch': 0.91}
|
|||
|
|
91%|█████████ | 310/340 [18:23<01:24, 2.81s/it]
91%|█████████▏| 311/340 [18:26<01:19, 2.74s/it]
92%|█████████▏| 312/340 [18:29<01:17, 2.75s/it]
92%|█████████▏| 313/340 [18:31<01:13, 2.71s/it]
92%|█████████▏| 314/340 [18:34<01:09, 2.68s/it]
93%|█████████▎| 315/340 [18:36<01:06, 2.65s/it]
{'loss': 0.5593, 'grad_norm': 0.5633993148803711, 'learning_rate': 8.85387393063622e-09, 'beta_dpo/gap_mean': 23.013385772705078, 'beta_dpo/gap_std': 30.935138702392578, 'beta_dpo/beta_used_raw': -0.010266167111694813, 'beta_dpo/beta_used': 0.06637457758188248, 'beta_dpo/mask_keep_frac': 0.7875000238418579, 'logits/chosen': -0.6413298845291138, 'logits/rejected': -0.6052228808403015, 'epoch': 0.93}
|
|||
|
|
93%|█████████▎| 315/340 [18:36<01:06, 2.65s/it]
93%|█████████▎| 316/340 [18:39<01:02, 2.60s/it]
93%|█████████▎| 317/340 [18:41<00:59, 2.59s/it]
94%|█████████▎| 318/340 [18:44<00:56, 2.59s/it]
94%|█████████▍| 319/340 [18:47<00:54, 2.60s/it]
94%|█████████▍| 320/340 [18:49<00:51, 2.58s/it]
{'loss': 0.6832, 'grad_norm': 0.6485550999641418, 'learning_rate': 5.7879205600998296e-09, 'beta_dpo/gap_mean': 22.677587509155273, 'beta_dpo/gap_std': 31.181507110595703, 'beta_dpo/beta_used_raw': -0.06694652885198593, 'beta_dpo/beta_used': 0.0010000000474974513, 'beta_dpo/mask_keep_frac': 0.875, 'logits/chosen': -0.6589199304580688, 'logits/rejected': -0.6012631058692932, 'epoch': 0.94}
|
|||
|
|
94%|█████████▍| 320/340 [18:49<00:51, 2.58s/it]
94%|█████████▍| 321/340 [18:52<00:49, 2.58s/it]
95%|█████████▍| 322/340 [18:54<00:46, 2.59s/it]
95%|█████████▌| 323/340 [18:57<00:43, 2.59s/it]
95%|█████████▌| 324/340 [19:00<00:41, 2.59s/it]
96%|█████████▌| 325/340 [19:02<00:39, 2.63s/it]
{'loss': 0.6068, 'grad_norm': 0.5991944670677185, 'learning_rate': 3.3653488440851253e-09, 'beta_dpo/gap_mean': 23.140369415283203, 'beta_dpo/gap_std': 31.43625259399414, 'beta_dpo/beta_used_raw': -0.054052434861660004, 'beta_dpo/beta_used': 0.03462111949920654, 'beta_dpo/mask_keep_frac': 0.8500000238418579, 'logits/chosen': -0.6647250652313232, 'logits/rejected': -0.6088197231292725, 'epoch': 0.96}
|
|||
|
|
96%|█████████▌| 325/340 [19:02<00:39, 2.63s/it]
96%|█████████▌| 326/340 [19:05<00:36, 2.60s/it]
96%|█████████▌| 327/340 [19:07<00:33, 2.55s/it]
96%|█████████▋| 328/340 [19:10<00:30, 2.56s/it]
97%|█████████▋| 329/340 [19:12<00:27, 2.53s/it]
97%|█████████▋| 330/340 [19:15<00:25, 2.56s/it]
{'loss': 0.4828, 'grad_norm': 0.614765465259552, 'learning_rate': 1.592541096695571e-09, 'beta_dpo/gap_mean': 24.296361923217773, 'beta_dpo/gap_std': 31.577083587646484, 'beta_dpo/beta_used_raw': 0.029860854148864746, 'beta_dpo/beta_used': 0.03790256381034851, 'beta_dpo/mask_keep_frac': 0.7124999761581421, 'logits/chosen': -0.6624591946601868, 'logits/rejected': -0.62751704454422, 'epoch': 0.97}
|
|||
|
|
97%|█████████▋| 330/340 [19:15<00:25, 2.56s/it]
97%|█████████▋| 331/340 [19:17<00:23, 2.56s/it]
98%|█████████▊| 332/340 [19:20<00:20, 2.61s/it]
98%|█████████▊| 333/340 [19:23<00:18, 2.61s/it]
98%|█████████▊| 334/340 [19:25<00:15, 2.61s/it]
99%|█████████▊| 335/340 [19:28<00:13, 2.60s/it]
{'loss': 0.5653, 'grad_norm': 25.446868896484375, 'learning_rate': 4.741678157389739e-10, 'beta_dpo/gap_mean': 24.669193267822266, 'beta_dpo/gap_std': 30.901264190673828, 'beta_dpo/beta_used_raw': -0.03980039432644844, 'beta_dpo/beta_used': 0.02071220614016056, 'beta_dpo/mask_keep_frac': 0.8125, 'logits/chosen': -0.6353505849838257, 'logits/rejected': -0.590802788734436, 'epoch': 0.99}
|
|||
|
|
99%|█████████▊| 335/340 [19:28<00:13, 2.60s/it]
99%|█████████▉| 336/340 [19:30<00:10, 2.56s/it]
99%|█████████▉| 337/340 [19:33<00:07, 2.55s/it]
99%|█████████▉| 338/340 [19:35<00:05, 2.54s/it]
100%|█████████▉| 339/340 [19:38<00:02, 2.51s/it]
100%|██████████| 340/340 [19:41<00:00, 2.53s/it]
{'loss': 0.6244, 'grad_norm': 0.6792064309120178, 'learning_rate': 1.31753782067201e-11, 'beta_dpo/gap_mean': 25.268396377563477, 'beta_dpo/gap_std': 30.97623062133789, 'beta_dpo/beta_used_raw': -0.04880411922931671, 'beta_dpo/beta_used': 0.04662991315126419, 'beta_dpo/mask_keep_frac': 0.75, 'logits/chosen': -0.6614812016487122, 'logits/rejected': -0.6312215924263, 'epoch': 1.0}
|
|||
|
|
100%|██████████| 340/340 [19:41<00:00, 2.53s/it][INFO|trainer.py:3984] 2026-04-10 22:30:17,890 >> Saving model checkpoint to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/checkpoint-340
|
|||
|
|
[INFO|configuration_utils.py:419] 2026-04-10 22:30:17,897 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/checkpoint-340/config.json
|
|||
|
|
[INFO|configuration_utils.py:911] 2026-04-10 22:30:17,902 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/checkpoint-340/generation_config.json
|
|||
|
|
[INFO|modeling_utils.py:3580] 2026-04-10 22:31:00,072 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 6 checkpoint shards. You can find where each parameters has been saved in the index located at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/checkpoint-340/model.safetensors.index.json.
|
|||
|
|
[INFO|tokenization_utils_base.py:2510] 2026-04-10 22:31:00,084 >> tokenizer config file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/checkpoint-340/tokenizer_config.json
|
|||
|
|
[INFO|tokenization_utils_base.py:2519] 2026-04-10 22:31:00,089 >> Special tokens file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/checkpoint-340/special_tokens_map.json
|
|||
|
|
[INFO|trainer.py:2681] 2026-04-10 22:34:15,986 >>
|
|||
|
|
|
|||
|
|
Training completed. Do not forget to share your model on huggingface.co/models =)
|
|||
|
|
|
|||
|
|
|
|||
|
|
{'train_runtime': 1440.2657, 'train_samples_per_second': 30.271, 'train_steps_per_second': 0.236, 'train_loss': 0.5267414394546958, 'epoch': 1.0}
|
|||
|
|
100%|██████████| 340/340 [23:53<00:00, 2.53s/it]
100%|██████████| 340/340 [23:53<00:00, 4.22s/it]
|
|||
|
|
***** train metrics *****
|
|||
|
|
epoch = 1.0
|
|||
|
|
total_flos = 0GF
|
|||
|
|
train_loss = 0.5267
|
|||
|
|
train_runtime = 0:24:00.26
|
|||
|
|
train_samples = 43598
|
|||
|
|
train_samples_per_second = 30.271
|
|||
|
|
train_steps_per_second = 0.236
|
|||
|
|
2026-04-10 22:34:16 - INFO - __main__ - *** Training complete ***
|
|||
|
|
2026-04-10 22:34:16 - INFO - __main__ - *** Save model ***
|
|||
|
|
[INFO|configuration_utils.py:419] 2026-04-10 22:34:32,873 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/config.json
|
|||
|
|
[INFO|configuration_utils.py:911] 2026-04-10 22:34:32,876 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/generation_config.json
|
|||
|
|
[INFO|modeling_utils.py:3580] 2026-04-10 22:35:18,560 >> The model is bigger than the maximum size per checkpoint (5GB) and is going to be split in 7 checkpoint shards. You can find where each parameters has been saved in the index located at /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/model.safetensors.index.json.
|
|||
|
|
[INFO|tokenization_utils_base.py:2510] 2026-04-10 22:35:18,567 >> tokenizer config file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/tokenizer_config.json
|
|||
|
|
[INFO|tokenization_utils_base.py:2519] 2026-04-10 22:35:18,571 >> Special tokens file saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/special_tokens_map.json
|
|||
|
|
2026-04-10 22:35:18 - INFO - __main__ - Saved HF-compatible model artifacts to /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627
|
|||
|
|
[INFO|modelcard.py:450] 2026-04-10 22:35:18,790 >> Dropping the following result as it does not have all the necessary fields:
|
|||
|
|
{'dataset': {'name': 'Anthropic/hh-rlhf', 'type': 'Anthropic/hh-rlhf'}}
|
|||
|
|
[INFO|configuration_utils.py:419] 2026-04-10 22:35:18,802 >> Configuration saved in /scratch/feng.yulu/dynamic-dpo-v4/outputs/llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627/config.json
|
|||
|
|
2026-04-10 22:35:18 - INFO - __main__ - *** Evaluate ***
|
|||
|
|
[INFO|trainer.py:4307] 2026-04-10 22:35:18,803 >>
|
|||
|
|
***** Running Evaluation *****
|
|||
|
|
[INFO|trainer.py:4309] 2026-04-10 22:35:18,803 >> Num examples = 2339
|
|||
|
|
[INFO|trainer.py:4312] 2026-04-10 22:35:18,803 >> Batch size = 16
|
|||
|
|
0%| | 0/18 [00:00<?, ?it/s]
11%|█ | 2/18 [00:01<00:08, 1.78it/s]
17%|█▋ | 3/18 [00:02<00:11, 1.29it/s]
22%|██▏ | 4/18 [00:03<00:12, 1.10it/s]
28%|██▊ | 5/18 [00:04<00:12, 1.01it/s]
33%|███▎ | 6/18 [00:05<00:12, 1.04s/it]
39%|███▉ | 7/18 [00:06<00:11, 1.07s/it]
44%|████▍ | 8/18 [00:07<00:10, 1.09s/it]
50%|█████ | 9/18 [00:08<00:09, 1.10s/it]
56%|█████▌ | 10/18 [00:10<00:08, 1.11s/it]
61%|██████ | 11/18 [00:11<00:07, 1.10s/it]
67%|██████▋ | 12/18 [00:12<00:06, 1.12s/it]
72%|███████▏ | 13/18 [00:13<00:05, 1.12s/it]
78%|███████▊ | 14/18 [00:14<00:04, 1.13s/it]
83%|████████▎ | 15/18 [00:15<00:03, 1.13s/it]
89%|████████▉ | 16/18 [00:16<00:02, 1.13s/it]
94%|█████████▍| 17/18 [00:18<00:01, 1.13s/it]
100%|██████████| 18/18 [00:19<00:00, 1.12s/it]
100%|██████████| 18/18 [00:19<00:00, 1.06s/it]
|
|||
|
|
***** eval metrics *****
|
|||
|
|
epoch = 1.0
|
|||
|
|
eval_beta_dpo/beta_used = 0.0253
|
|||
|
|
eval_beta_dpo/beta_used_raw = -0.1409
|
|||
|
|
eval_beta_dpo/gap_mean = 20.5226
|
|||
|
|
eval_beta_dpo/gap_std = 30.1903
|
|||
|
|
eval_beta_dpo/mask_keep_frac = 1.0
|
|||
|
|
eval_logits/chosen = -0.6686
|
|||
|
|
eval_logits/rejected = -0.6267
|
|||
|
|
eval_loss = 0.6458
|
|||
|
|
eval_runtime = 0:00:20.23
|
|||
|
|
eval_samples = 2339
|
|||
|
|
eval_samples_per_second = 115.585
|
|||
|
|
eval_steps_per_second = 0.939
|
|||
|
|
2026-04-10 22:35:39 - INFO - __main__ - *** Training complete! ***
|
|||
|
|
wandb: - 0.015 MB of 0.015 MB uploaded
wandb: \ 0.015 MB of 0.015 MB uploaded
wandb: | 0.015 MB of 0.015 MB uploaded
wandb: / 0.015 MB of 0.015 MB uploaded
wandb: - 0.015 MB of 0.044 MB uploaded
wandb: \ 0.018 MB of 0.047 MB uploaded
wandb: | 0.047 MB of 0.047 MB uploaded
wandb:
|
|||
|
|
wandb: Run history:
|
|||
|
|
wandb: eval/beta_dpo/beta_used █▆▃▁
|
|||
|
|
wandb: eval/beta_dpo/beta_used_raw █▄▂▁
|
|||
|
|
wandb: eval/beta_dpo/gap_mean ▁▆██
|
|||
|
|
wandb: eval/beta_dpo/gap_std ▁▆██
|
|||
|
|
wandb: eval/beta_dpo/mask_keep_frac ▁▁▁▁
|
|||
|
|
wandb: eval/logits/chosen ▁▁█▇
|
|||
|
|
wandb: eval/logits/rejected ▁▂█▇
|
|||
|
|
wandb: eval/loss ▁▇▇█
|
|||
|
|
wandb: eval/runtime █▄▅▁
|
|||
|
|
wandb: eval/samples_per_second ▁▅▄█
|
|||
|
|
wandb: eval/steps_per_second ▁▅▄█
|
|||
|
|
wandb: train/beta_dpo/beta_used ▄▄▄▅▅▅▅▄▅▄▃▃▅▄▃▃▄▄▄▄▂▂▃▃▇▃▂▃▂▁▃▄▃▁▂█▆▁▂▃
|
|||
|
|
wandb: train/beta_dpo/beta_used_raw ▅▅▅▅▆▆▆▅▆▅▄▄▆▅▄▄▅▅▄▅▃▃▄▃▇▄▃▃▃▁▄▃▂▁▃█▆▁▄▂
|
|||
|
|
wandb: train/beta_dpo/gap_mean ▁▁▁▁▁▁▂▂▃▃▃▃▄▄▄▄▄▅▅▅▆▆▆▇▆▇▇▇▇▇▇▇███▆▇▇██
|
|||
|
|
wandb: train/beta_dpo/gap_std ▁▁▁▁▁▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▇▇▇▇▇▇▇▇████████
|
|||
|
|
wandb: train/beta_dpo/mask_keep_frac █▃▅▇▃▃▂▂▅▄▄▄▃▂▄▃▃▂▁▄▄▅▅▆▇▆▄▄▂▅▃▄▂▃▄▅▄▆▁▂
|
|||
|
|
wandb: train/epoch ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
|
|||
|
|
wandb: train/global_step ▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇████
|
|||
|
|
wandb: train/grad_norm ▃▃▃▃▂▃▃▃▃▃▁▂▄▂▂▁▃▃▃▇▁▂▃▇▇▅▃▂▂▂▁▂▁▁▅█▁▁▁▁
|
|||
|
|
wandb: train/learning_rate ▁▂▄▆▇██████▇▇▇▇▆▆▆▆▅▅▅▄▄▄▄▃▃▃▂▂▂▂▂▁▁▁▁▁▁
|
|||
|
|
wandb: train/logits/chosen █▇▆▇▆▄▃▂▃▁▂▂▃▁▂▂▂▂▂▃▂▃▂▃▄▅▃▂▄▂▃▃▃▃▄▃▅▃▃▃
|
|||
|
|
wandb: train/logits/rejected █▇▆▇▆▄▃▂▃▁▂▂▃▂▂▂▃▂▃▃▂▃▃▃▄▅▃▂▅▃▃▄▃▄▃▃▅▄▃▃
|
|||
|
|
wandb: train/loss ████▇▅▄▄▃▄▄▄▂▃▄▄▂▁▂▃▅▅▄▅▂▅▃▂▄▇▄▃▄▇▅▄▃█▃▆
|
|||
|
|
wandb:
|
|||
|
|
wandb: Run summary:
|
|||
|
|
wandb: eval/beta_dpo/beta_used 0.02526
|
|||
|
|
wandb: eval/beta_dpo/beta_used_raw -0.14089
|
|||
|
|
wandb: eval/beta_dpo/gap_mean 20.52261
|
|||
|
|
wandb: eval/beta_dpo/gap_std 30.19027
|
|||
|
|
wandb: eval/beta_dpo/mask_keep_frac 1.0
|
|||
|
|
wandb: eval/logits/chosen -0.66861
|
|||
|
|
wandb: eval/logits/rejected -0.62674
|
|||
|
|
wandb: eval/loss 0.64585
|
|||
|
|
wandb: eval/runtime 20.2363
|
|||
|
|
wandb: eval/samples_per_second 115.585
|
|||
|
|
wandb: eval/steps_per_second 0.939
|
|||
|
|
wandb: total_flos 0.0
|
|||
|
|
wandb: train/beta_dpo/beta_used 0.04663
|
|||
|
|
wandb: train/beta_dpo/beta_used_raw -0.0488
|
|||
|
|
wandb: train/beta_dpo/gap_mean 25.2684
|
|||
|
|
wandb: train/beta_dpo/gap_std 30.97623
|
|||
|
|
wandb: train/beta_dpo/mask_keep_frac 0.75
|
|||
|
|
wandb: train/epoch 1.0
|
|||
|
|
wandb: train/global_step 340
|
|||
|
|
wandb: train/grad_norm 0.67921
|
|||
|
|
wandb: train/learning_rate 0.0
|
|||
|
|
wandb: train/logits/chosen -0.66148
|
|||
|
|
wandb: train/logits/rejected -0.63122
|
|||
|
|
wandb: train/loss 0.6244
|
|||
|
|
wandb: train_loss 0.52674
|
|||
|
|
wandb: train_runtime 1440.2657
|
|||
|
|
wandb: train_samples_per_second 30.271
|
|||
|
|
wandb: train_steps_per_second 0.236
|
|||
|
|
wandb:
|
|||
|
|
wandb: 🚀 View run llama-3-8b-base-beta-dpo-hh-helpful-8xh200-20260410-215627 at: https://wandb.ai/can-not-fand-northeastern-university/huggingface/runs/54i2is22
|
|||
|
|
wandb: ⭐️ View project at: https://wandb.ai/can-not-fand-northeastern-university/huggingface
|
|||
|
|
wandb: Synced 6 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)
|
|||
|
|
wandb: Find logs at: /scratch/feng.yulu/dynamic-dpo-v4/wandb/wandb/run-20260410_221018-54i2is22/logs
|
|||
|
|
wandb: WARNING The new W&B backend becomes opt-out in version 0.18.0; try it out with `wandb.require("core")`! See https://wandb.me/wandb-core for more information.
|